Updating branches/google/stable to r176857

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/google/stable@177044 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4544f15..90062cd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,45 +15,101 @@
 # runtime libraries.
 cmake_minimum_required(VERSION 2.8.8)
 
-# FIXME: Below we assume that the target build of LLVM/Clang is x86, which is
-# not at all valid. Much of this can be fixed just by switching to use
-# a just-built-clang binary for the compiles.
+# Compute the Clang version from the LLVM version.
+# FIXME: We should be able to reuse CLANG_VERSION variable calculated
+#        in Clang cmake files, instead of copying the rules here.
+string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
+       ${PACKAGE_VERSION})
+# Setup the paths where compiler-rt runtimes and headers should be stored.
+set(LIBCLANG_INSTALL_PATH lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION})
+string(TOLOWER ${CMAKE_SYSTEM_NAME} LIBCLANG_OS_DIR)
+set(COMPILER_RT_LIBRARY_OUTPUT_DIR 
+  ${LLVM_BINARY_DIR}/lib/clang/${CLANG_VERSION}/lib/${LIBCLANG_OS_DIR})
+set(COMPILER_RT_LIBRARY_INSTALL_DIR
+ ${LIBCLANG_INSTALL_PATH}/lib/${LIBCLANG_OS_DIR}) 
+
+# Add path for custom modules
+set(CMAKE_MODULE_PATH
+  ${CMAKE_MODULE_PATH}
+  "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules"
+  )
+include(AddCompilerRT)
+
+set(COMPILER_RT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 # Detect whether the current target platform is 32-bit or 64-bit, and setup
 # the correct commandline flags needed to attempt to target 32-bit and 64-bit.
 if(CMAKE_SIZEOF_VOID_P EQUAL 4 OR LLVM_BUILD_32_BITS)
-  set(TARGET_X86_64_CFLAGS "-m64")
-  set(TARGET_I386_CFLAGS "")
+  set(TARGET_64_BIT_CFLAGS "-m64")
+  set(TARGET_32_BIT_CFLAGS "")
 else()
   if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
     message(FATAL_ERROR "Please use a sane architecture with 4 or 8 byte pointers.")
   endif()
-  set(TARGET_X86_64_CFLAGS "")
-  set(TARGET_I386_CFLAGS "-m32")
+  set(TARGET_64_BIT_CFLAGS "")
+  set(TARGET_32_BIT_CFLAGS "-m32")
 endif()
 
+# List of architectures we can target.
+set(COMPILER_RT_SUPPORTED_ARCH)
+
+function(get_target_flags_for_arch arch out_var)
+  list(FIND COMPILER_RT_SUPPORTED_ARCH ${arch} ARCH_INDEX)
+  if(ARCH_INDEX EQUAL -1)
+    message(FATAL_ERROR "Unsupported architecture: ${arch}")
+  else()
+    set(${out_var} ${TARGET_${arch}_CFLAGS} PARENT_SCOPE)
+  endif()
+endfunction()
+
 # Try to compile a very simple source file to ensure we can target the given
 # platform. We use the results of these tests to build only the various target
 # runtime libraries supported by our current compilers cross-compiling
 # abilities.
-set(SIMPLE_SOURCE64 ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple64.c)
-file(WRITE ${SIMPLE_SOURCE64} "#include <stdlib.h>\nint main() {}")
-try_compile(CAN_TARGET_X86_64 ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE64}
-            COMPILE_DEFINITIONS "${TARGET_X86_64_CFLAGS}"
-            CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${TARGET_X86_64_CFLAGS}")
+set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.c)
+file(WRITE ${SIMPLE_SOURCE} "#include <stdlib.h>\nint main() {}")
 
-set(SIMPLE_SOURCE32 ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple32.c)
-file(WRITE ${SIMPLE_SOURCE32} "#include <stdlib.h>\nint main() {}")
-try_compile(CAN_TARGET_I386 ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE32}
-            COMPILE_DEFINITIONS "${TARGET_I386_CFLAGS}"
-            CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${TARGET_I386_CFLAGS}")
+# test_target_arch(<arch> <target flags...>)
+# Sets the target flags for a given architecture and determines if this
+# architecture is supported by trying to build a simple file.
+macro(test_target_arch arch)
+  set(TARGET_${arch}_CFLAGS ${ARGN})
+  try_compile(CAN_TARGET_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE}
+              COMPILE_DEFINITIONS "${TARGET_${arch}_CFLAGS}"
+              CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${TARGET_${arch}_CFLAGS}")
+  if(${CAN_TARGET_${arch}})
+    list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
+  endif()
+endmacro()
+
+if("${LLVM_NATIVE_ARCH}" STREQUAL "X86")
+  test_target_arch(x86_64 ${TARGET_64_BIT_CFLAGS})
+  test_target_arch(i386 ${TARGET_32_BIT_CFLAGS})
+elseif("${LLVM_NATIVE_ARCH}" STREQUAL "PowerPC")
+  # Explicitly set -m flag on powerpc, because on ppc64 defaults for gcc and
+  # clang are different.
+  test_target_arch(powerpc64 "-m64")
+  test_target_arch(powerpc "-m32")
+endif()
+
+# We only support running instrumented tests when we're not cross compiling
+# and target a unix-like system. On Android we define the rules for building
+# unit tests, but don't execute them.
+if("${CMAKE_HOST_SYSTEM}" STREQUAL "${CMAKE_SYSTEM}" AND UNIX AND NOT ANDROID)
+  set(COMPILER_RT_CAN_EXECUTE_TESTS TRUE)
+else()
+  set(COMPILER_RT_CAN_EXECUTE_TESTS FALSE)
+endif()
+    
+# Check if compiler-rt is built with libc++.
+find_flag_in_string("${CMAKE_CXX_FLAGS}" "-stdlib=libc++"
+                    COMPILER_RT_USES_LIBCXX)
 
 function(filter_available_targets out_var)
   set(archs)
   foreach(arch ${ARGN})
-    if(${arch} STREQUAL "x86_64" AND CAN_TARGET_X86_64)
-      list(APPEND archs ${arch})
-    elseif (${arch} STREQUAL "i386" AND CAN_TARGET_I386)
+    list(FIND COMPILER_RT_SUPPORTED_ARCH ${arch} ARCH_INDEX)
+    if(NOT (ARCH_INDEX EQUAL -1) AND CAN_TARGET_${arch})
       list(APPEND archs ${arch})
     endif()
   endforeach()
@@ -88,35 +144,55 @@
 if(SUPPORTS_NO_C99_EXTENSIONS_FLAG)
   list(APPEND SANITIZER_COMMON_CFLAGS -Wno-c99-extensions)
 endif()
+
+# Setup min Mac OS X version.
 if(APPLE)
-  list(APPEND SANITIZER_COMMON_CFLAGS -mmacosx-version-min=10.5)
+  if(COMPILER_RT_USES_LIBCXX)
+    set(SANITIZER_MIN_OSX_VERSION 10.7)
+  else()
+    set(SANITIZER_MIN_OSX_VERSION 10.5)
+  endif()
+  list(APPEND SANITIZER_COMMON_CFLAGS
+    -mmacosx-version-min=${SANITIZER_MIN_OSX_VERSION})
 endif()
 
-# Because compiler-rt spends a lot of time setting up custom compile flags,
-# define a handy helper function for it. The compile flags setting in CMake
-# has serious issues that make its syntax challenging at best.
-function(set_target_compile_flags target)
-  foreach(arg ${ARGN})
-    set(argstring "${argstring} ${arg}")
-  endforeach()
-  set_property(TARGET ${target} PROPERTY COMPILE_FLAGS "${argstring}")
-endfunction()
+# Architectures supported by Sanitizer runtimes. Specific sanitizers may
+# support only subset of these (e.g. TSan works on x86_64 only).
+filter_available_targets(SANITIZER_COMMON_SUPPORTED_ARCH
+  x86_64 i386 powerpc64 powerpc)
 
-function(set_target_link_flags target)
-  foreach(arg ${ARGN})
-    set(argstring "${argstring} ${arg}")
-  endforeach()
-  set_property(TARGET ${target} PROPERTY LINK_FLAGS "${argstring}")
-endfunction()
+file(GLOB_RECURSE COMPILER_RT_HEADERS
+  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/include"
+  "include/*.h")
 
-# Compute the Clang version from the LLVM version.
-# FIXME: We should be able to reuse CLANG_VERSION variable calculated
-#        in Clang cmake files, instead of copying the rules here.
-string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
-       ${PACKAGE_VERSION})
-# Setup the paths where compiler-rt runtimes and headers should be stored.
-set(LIBCLANG_INSTALL_PATH lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION})
-string(TOLOWER ${CMAKE_SYSTEM_NAME} LIBCLANG_OS_DIR)
+set(output_dir ${LLVM_BINARY_DIR}/lib/clang/${CLANG_VERSION}/include)
+
+if(MSVC_IDE OR XCODE)
+   set(other_output_dir ${LLVM_BINARY_DIR}/bin/lib/clang/${CLANG_VERSION}/include)
+endif()
+
+# Copy compiler-rt headers to the build tree.
+set(out_files)
+foreach( f ${COMPILER_RT_HEADERS} )
+  set( src ${CMAKE_CURRENT_SOURCE_DIR}/include/${f} )
+  set( dst ${output_dir}/${f} )
+  add_custom_command(OUTPUT ${dst}
+    DEPENDS ${src}
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst}
+    COMMENT "Copying compiler-rt's ${f}...")
+  list(APPEND out_files ${dst})
+
+  if(other_output_dir)
+   set(other_dst ${other_output_dir}/${f})
+    add_custom_command(OUTPUT ${other_dst}
+      DEPENDS ${src}
+      COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${other_dst}
+      COMMENT "Copying compiler-rt's ${f}...")    
+    list(APPEND out_files ${other_dst})
+  endif()
+endforeach( f )
+
+add_custom_target(compiler-rt-headers ALL DEPENDS ${out_files})
 
 # Install compiler-rt headers.
 install(DIRECTORY include/
@@ -126,23 +202,6 @@
   PATTERN ".svn" EXCLUDE
   )
 
-# Call add_clang_compiler_rt_libraries to make sure that targets are built
-# and installed in the directories where Clang driver expects to find them.
-macro(add_clang_compiler_rt_libraries)
-  # Setup output directories so that clang in build tree works.
-  set_target_properties(${ARGN} PROPERTIES
-    ARCHIVE_OUTPUT_DIRECTORY
-      ${LLVM_BINARY_DIR}/lib/clang/${CLANG_VERSION}/lib/${LIBCLANG_OS_DIR}
-    LIBRARY_OUTPUT_DIRECTORY
-      ${LLVM_BINARY_DIR}/lib/clang/${CLANG_VERSION}/lib/${LIBCLANG_OS_DIR}
-    )
-  # Add installation command.
-  install(TARGETS ${ARGN}
-    ARCHIVE DESTINATION ${LIBCLANG_INSTALL_PATH}/lib/${LIBCLANG_OS_DIR}
-    LIBRARY DESTINATION ${LIBCLANG_INSTALL_PATH}/lib/${LIBCLANG_OS_DIR}
-    )
-endmacro(add_clang_compiler_rt_libraries)
-
 # Add the public header's directory to the includes for all of compiler-rt.
 include_directories(include)
 
diff --git a/LICENSE.TXT b/LICENSE.TXT
index f717942..6aab1f6 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -14,7 +14,7 @@
 University of Illinois/NCSA
 Open Source License
 
-Copyright (c) 2009-2012 by the contributors listed in CREDITS.TXT
+Copyright (c) 2009-2013 by the contributors listed in CREDITS.TXT
 
 All rights reserved.
 
@@ -55,7 +55,7 @@
 
 ==============================================================================
 
-Copyright (c) 2009-2012 by the contributors listed in CREDITS.TXT
+Copyright (c) 2009-2013 by the contributors listed in CREDITS.TXT
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/cmake/Modules/AddCompilerRT.cmake b/cmake/Modules/AddCompilerRT.cmake
new file mode 100644
index 0000000..742d81f
--- /dev/null
+++ b/cmake/Modules/AddCompilerRT.cmake
@@ -0,0 +1,123 @@
+include(AddLLVM)
+include(LLVMParseArguments)
+include(CompilerRTUtils)
+
+# Tries to add "object library" target for a given architecture
+# with name "<name>.<arch>" if architecture can be targeted.
+# add_compiler_rt_object_library(<name> <arch>
+#                                SOURCES <source files>
+#                                CFLAGS <compile flags>)
+macro(add_compiler_rt_object_library name arch)
+  if(CAN_TARGET_${arch})
+    parse_arguments(LIB "SOURCES;CFLAGS" "" ${ARGN})
+    add_library(${name}.${arch} OBJECT ${LIB_SOURCES})
+    set_target_compile_flags(${name}.${arch}
+      ${TARGET_${arch}_CFLAGS} ${LIB_CFLAGS})
+  else()
+    message(FATAL_ERROR "Archtecture ${arch} can't be targeted")
+  endif()
+endmacro()
+
+# Same as above, but adds universal osx library with name "<name>.osx"
+# targeting multiple architectures.
+# add_compiler_rt_osx_object_library(<name> ARCH <architectures>
+#                                           SOURCES <source files>
+#                                           CFLAGS <compile flags>)
+macro(add_compiler_rt_osx_object_library name)
+  parse_arguments(LIB "ARCH;SOURCES;CFLAGS" "" ${ARGN})
+  set(libname "${name}.osx")
+  add_library(${libname} OBJECT ${LIB_SOURCES})
+  set_target_compile_flags(${libname} ${LIB_CFLAGS})
+  set_target_properties(${libname} PROPERTIES OSX_ARCHITECTURES "${LIB_ARCH}")
+endmacro()
+
+# Adds static runtime for a given architecture and puts it in the proper
+# directory in the build and install trees.
+# add_compiler_rt_static_runtime(<name> <arch>
+#                                SOURCES <source files>
+#                                CFLAGS <compile flags>
+#                                DEFS <compile definitions>)
+macro(add_compiler_rt_static_runtime name arch)
+  if(CAN_TARGET_${arch})
+    parse_arguments(LIB "SOURCES;CFLAGS;DEFS" "" ${ARGN})
+    add_library(${name} STATIC ${LIB_SOURCES})
+    # Setup compile flags and definitions.
+    set_target_compile_flags(${name}
+      ${TARGET_${arch}_CFLAGS} ${LIB_CFLAGS})
+    set_property(TARGET ${name} APPEND PROPERTY
+      COMPILE_DEFINITIONS ${LIB_DEFS})
+    # Setup correct output directory in the build tree.
+    set_target_properties(${name} PROPERTIES
+      ARCHIVE_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
+    # Add installation command.
+    install(TARGETS ${name}
+      ARCHIVE DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
+  else()
+    message(FATAL_ERROR "Archtecture ${arch} can't be targeted")
+  endif()
+endmacro()
+
+# Same as add_compiler_rt_static_runtime, but creates a universal library
+# for several architectures.
+# add_compiler_rt_osx_static_runtime(<name> ARCH <architectures>
+#                                    SOURCES <source files>
+#                                    CFLAGS <compile flags>
+#                                    DEFS <compile definitions>)
+macro(add_compiler_rt_osx_static_runtime name)
+  parse_arguments(LIB "ARCH;SOURCES;CFLAGS;DEFS" "" ${ARGN})
+  add_library(${name} STATIC ${LIB_SOURCES})
+  set_target_compile_flags(${name} ${LIB_CFLAGS})
+  set_property(TARGET ${name} APPEND PROPERTY
+    COMPILE_DEFINITIONS ${LIB_DEFS})
+  set_target_properties(${name} PROPERTIES
+    OSX_ARCHITECTURES "${LIB_ARCH}"
+    ARCHIVE_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
+  install(TARGETS ${name}
+    ARCHIVE DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
+endmacro()
+
+# Adds dynamic runtime library on osx, which supports multiple architectures.
+# add_compiler_rt_osx_dynamic_runtime(<name> ARCH <architectures>
+#                                     SOURCES <source files>
+#                                     CFLAGS <compile flags>
+#                                     DEFS <compile definitions>
+#                                     LINKFLAGS <link flags>)
+macro(add_compiler_rt_osx_dynamic_runtime name)
+  parse_arguments(LIB "ARCH;SOURCES;CFLAGS;DEFS;LINKFLAGS" "" ${ARGN})
+  add_library(${name} SHARED ${LIB_SOURCES})
+  set_target_compile_flags(${name} ${LIB_CFLAGS})
+  set_target_link_flags(${name} ${LIB_LINKFLAGS})
+  set_property(TARGET ${name} APPEND PROPERTY
+    COMPILE_DEFINITIONS ${LIB_DEFS})
+  set_target_properties(${name} PROPERTIES
+    OSX_ARCHITECTURES "${LIB_ARCH}"
+    LIBRARY_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
+  install(TARGETS ${name}
+    LIBRARY DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
+endmacro()
+
+# Unittests support.
+set(COMPILER_RT_GTEST_PATH ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest)
+set(COMPILER_RT_GTEST_SOURCE ${COMPILER_RT_GTEST_PATH}/gtest-all.cc)
+set(COMPILER_RT_GTEST_INCLUDE_CFLAGS
+  -DGTEST_NO_LLVM_RAW_OSTREAM=1
+  -I${COMPILER_RT_GTEST_PATH}/include
+)
+
+# Use Clang to link objects into a single executable with just-built
+# Clang, using specific link flags. Make executable a part of provided
+# test_suite.
+# add_compiler_rt_test(<test_suite> <test_name>
+#                      OBJECTS <object files>
+#                      DEPS <deps (e.g. runtime libs)>
+#                      LINK_FLAGS <link flags>)
+macro(add_compiler_rt_test test_suite test_name)
+  parse_arguments(TEST "OBJECTS;DEPS;LINK_FLAGS" "" ${ARGN})
+  set(output_bin "${CMAKE_CURRENT_BINARY_DIR}/${test_name}")
+  add_custom_target(${test_name}
+    COMMAND clang ${TEST_OBJECTS} -o "${output_bin}"
+            ${TEST_LINK_FLAGS}
+    DEPENDS clang ${TEST_DEPS})
+  # Make the test suite depend on the binary.
+  add_dependencies(${test_suite} ${test_name})
+endmacro()
diff --git a/cmake/Modules/CompilerRTCompile.cmake b/cmake/Modules/CompilerRTCompile.cmake
new file mode 100644
index 0000000..2794cab
--- /dev/null
+++ b/cmake/Modules/CompilerRTCompile.cmake
@@ -0,0 +1,16 @@
+include(LLVMParseArguments)
+
+# Compile a source into an object file with just-built Clang using
+# a provided compile flags and dependenices.
+# clang_compile(<object> <source>
+#               CFLAGS <list of compile flags>
+#               DEPS <list of dependencies>)
+macro(clang_compile object_file source)
+  parse_arguments(SOURCE "CFLAGS;DEPS" "" ${ARGN})
+  get_filename_component(source_rpath ${source} REALPATH)
+  add_custom_command(
+    OUTPUT ${object_file}
+    COMMAND clang ${SOURCE_CFLAGS} -c -o "${object_file}" ${source_rpath}
+    MAIN_DEPENDENCY ${source}
+    DEPENDS clang ${SOURCE_DEPS})
+endmacro()
diff --git a/cmake/Modules/CompilerRTLink.cmake b/cmake/Modules/CompilerRTLink.cmake
new file mode 100644
index 0000000..85030a7
--- /dev/null
+++ b/cmake/Modules/CompilerRTLink.cmake
@@ -0,0 +1,14 @@
+include(LLVMParseArguments)
+
+# Link a shared library with just-built Clang.
+# clang_link_shared(<output.so>
+#                   OBJECTS <list of input objects>
+#                   LINKFLAGS <list of link flags>
+#                   DEPS <list of dependencies>)
+macro(clang_link_shared so_file)
+  parse_arguments(SOURCE "OBJECTS;LINKFLAGS;DEPS" "" ${ARGN})
+  add_custom_command(
+    OUTPUT ${so_file}
+    COMMAND clang -o "${so_file}" -shared ${SOURCE_LINKFLAGS} ${SOURCE_OBJECTS}
+    DEPENDS clang ${SOURCE_DEPS})
+endmacro()
diff --git a/cmake/Modules/CompilerRTUtils.cmake b/cmake/Modules/CompilerRTUtils.cmake
new file mode 100644
index 0000000..f9760f4
--- /dev/null
+++ b/cmake/Modules/CompilerRTUtils.cmake
@@ -0,0 +1,28 @@
+# Because compiler-rt spends a lot of time setting up custom compile flags,
+# define a handy helper function for it. The compile flags setting in CMake
+# has serious issues that make its syntax challenging at best.
+function(set_target_compile_flags target)
+  foreach(arg ${ARGN})
+    set(argstring "${argstring} ${arg}")
+  endforeach()
+  set_property(TARGET ${target} PROPERTY COMPILE_FLAGS "${argstring}")
+endfunction()
+
+function(set_target_link_flags target)
+  foreach(arg ${ARGN})
+    set(argstring "${argstring} ${arg}")
+  endforeach()
+  set_property(TARGET ${target} PROPERTY LINK_FLAGS "${argstring}")
+endfunction()
+
+# Check if a given flag is present in a space-separated flag_string.
+# Store the result in out_var.
+function(find_flag_in_string flag_string flag out_var)
+  string(REPLACE " " ";" flag_list ${flag_string})
+  list(FIND flag_list ${flag} flag_pos)
+  if(NOT flag_pos EQUAL -1)
+    set(${out_var} TRUE PARENT_SCOPE)
+  else()
+    set(${out_var} FALSE PARENT_SCOPE)
+  endif()
+endfunction()
diff --git a/include/sanitizer/asan_interface.h b/include/sanitizer/asan_interface.h
index 5b6a909..8adf3f1 100644
--- a/include/sanitizer/asan_interface.h
+++ b/include/sanitizer/asan_interface.h
@@ -7,74 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file is a part of AddressSanitizer, an address sanity checker.
+// This file is a part of AddressSanitizer.
 //
-// This header can be included by the instrumented program to fetch
-// data (mostly allocator statistics) from ASan runtime library.
+// Public interface header.
 //===----------------------------------------------------------------------===//
 #ifndef SANITIZER_ASAN_INTERFACE_H
 #define SANITIZER_ASAN_INTERFACE_H
 
 #include <sanitizer/common_interface_defs.h>
 
-// ----------- ATTENTION -------------
-// This header should NOT include any other headers from ASan runtime.
-// All functions in this header are extern "C" and start with __asan_.
-
-using __sanitizer::uptr;
-
+#ifdef __cplusplus
 extern "C" {
-  // This function should be called at the very beginning of the process,
-  // before any instrumented code is executed and before any call to malloc.
-  void __asan_init() SANITIZER_INTERFACE_ATTRIBUTE;
-
-  // This function should be called by the instrumented code.
-  // 'addr' is the address of a global variable called 'name' of 'size' bytes.
-  void __asan_register_global(uptr addr, uptr size, const char *name)
-      SANITIZER_INTERFACE_ATTRIBUTE;
-
-  // This structure describes an instrumented global variable.
-  struct __asan_global {
-    uptr beg;                // The address of the global.
-    uptr size;               // The original size of the global.
-    uptr size_with_redzone;  // The size with the redzone.
-    const char *name;        // Name as a C string.
-    uptr has_dynamic_init;   // Non-zero if the global has dynamic initializer.
-  };
-
-  // These two functions should be called by the instrumented code.
-  // 'globals' is an array of structures describing 'n' globals.
-  void __asan_register_globals(__asan_global *globals, uptr n)
-      SANITIZER_INTERFACE_ATTRIBUTE;
-  void __asan_unregister_globals(__asan_global *globals, uptr n)
-      SANITIZER_INTERFACE_ATTRIBUTE;
-
-  // These two functions should be called before and after dynamic initializers
-  // run, respectively.  They should be called with parameters describing all
-  // dynamically initialized globals defined in the calling TU.
-  void __asan_before_dynamic_init(uptr first_addr, uptr last_addr)
-      SANITIZER_INTERFACE_ATTRIBUTE;
-  void __asan_after_dynamic_init()
-      SANITIZER_INTERFACE_ATTRIBUTE;
-
-  // These two functions are used by the instrumented code in the
-  // use-after-return mode. __asan_stack_malloc allocates size bytes of
-  // fake stack and __asan_stack_free poisons it. real_stack is a pointer to
-  // the real stack region.
-  uptr __asan_stack_malloc(uptr size, uptr real_stack)
-      SANITIZER_INTERFACE_ATTRIBUTE;
-  void __asan_stack_free(uptr ptr, uptr size, uptr real_stack)
-      SANITIZER_INTERFACE_ATTRIBUTE;
-
-  // These two functions are used by instrumented code in the
-  // use-after-scope mode. They mark memory for local variables as
-  // unaddressable when they leave scope and addressable before the
-  // function exits.
-  void __asan_poison_stack_memory(uptr addr, uptr size)
-      SANITIZER_INTERFACE_ATTRIBUTE;
-  void __asan_unpoison_stack_memory(uptr addr, uptr size)
-      SANITIZER_INTERFACE_ATTRIBUTE;
-
+#endif
   // Marks memory region [addr, addr+size) as unaddressable.
   // This memory must be previously allocated by the user program. Accessing
   // addresses in this region from instrumented code is forbidden until
@@ -83,8 +27,7 @@
   // to ASan alignment restrictions.
   // Method is NOT thread-safe in the sense that no two threads can
   // (un)poison memory in the same memory region simultaneously.
-  void __asan_poison_memory_region(void const volatile *addr, uptr size)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_poison_memory_region(void const volatile *addr, size_t size);
   // Marks memory region [addr, addr+size) as addressable.
   // This memory must be previously allocated by the user program. Accessing
   // addresses in this region is allowed until this region is poisoned again.
@@ -92,15 +35,10 @@
   // ASan alignment restrictions.
   // Method is NOT thread-safe in the sense that no two threads can
   // (un)poison memory in the same memory region simultaneously.
-  void __asan_unpoison_memory_region(void const volatile *addr, uptr size)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 
-  // Performs cleanup before a NoReturn function. Must be called before things
-  // like _exit and execl to avoid false positives on stack.
-  void __asan_handle_no_return() SANITIZER_INTERFACE_ATTRIBUTE;
-
-// User code should use macro instead of functions.
-#if __has_feature(address_sanitizer)
+// User code should use macros instead of functions.
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
 #define ASAN_POISON_MEMORY_REGION(addr, size) \
   __asan_poison_memory_region((addr), (size))
 #define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
@@ -114,95 +52,86 @@
 
   // Returns true iff addr is poisoned (i.e. 1-byte read/write access to this
   // address will result in error report from AddressSanitizer).
-  bool __asan_address_is_poisoned(void const volatile *addr)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  bool __asan_address_is_poisoned(void const volatile *addr);
+
+  // If at least on byte in [beg, beg+size) is poisoned, return the address
+  // of the first such byte. Otherwise return 0.
+  void *__asan_region_is_poisoned(void *beg, size_t size);
+
+  // Print the description of addr (useful when debugging in gdb).
+  void __asan_describe_address(void *addr);
 
   // This is an internal function that is called to report an error.
   // However it is still a part of the interface because users may want to
   // set a breakpoint on this function in a debugger.
-  void __asan_report_error(uptr pc, uptr bp, uptr sp,
-                           uptr addr, bool is_write, uptr access_size)
-    SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_report_error(void *pc, void *bp, void *sp,
+                           void *addr, bool is_write, size_t access_size);
 
   // Sets the exit code to use when reporting an error.
   // Returns the old value.
-  int __asan_set_error_exit_code(int exit_code)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  int __asan_set_error_exit_code(int exit_code);
 
   // Sets the callback to be called right before death on error.
   // Passing 0 will unset the callback.
-  void __asan_set_death_callback(void (*callback)(void))
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_set_death_callback(void (*callback)(void));
 
-  void __asan_set_error_report_callback(void (*callback)(const char*))
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_set_error_report_callback(void (*callback)(const char*));
 
   // User may provide function that would be called right when ASan detects
   // an error. This can be used to notice cases when ASan detects an error, but
   // the program crashes before ASan report is printed.
-  /* OPTIONAL */ void __asan_on_error()
-      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_on_error();
 
   // User may provide its own implementation for symbolization function.
   // It should print the description of instruction at address "pc" to
   // "out_buffer". Description should be at most "out_size" bytes long.
   // User-specified function should return true if symbolization was
   // successful.
-  /* OPTIONAL */ bool __asan_symbolize(const void *pc, char *out_buffer,
-                                       int out_size)
-      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+  bool __asan_symbolize(const void *pc, char *out_buffer,
+                                       int out_size);
 
   // Returns the estimated number of bytes that will be reserved by allocator
   // for request of "size" bytes. If ASan allocator can't allocate that much
   // memory, returns the maximal possible allocation size, otherwise returns
   // "size".
-  uptr __asan_get_estimated_allocated_size(uptr size)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  size_t __asan_get_estimated_allocated_size(size_t size);
   // Returns true if p was returned by the ASan allocator and
   // is not yet freed.
-  bool __asan_get_ownership(const void *p)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  bool __asan_get_ownership(const void *p);
   // Returns the number of bytes reserved for the pointer p.
   // Requires (get_ownership(p) == true) or (p == 0).
-  uptr __asan_get_allocated_size(const void *p)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  size_t __asan_get_allocated_size(const void *p);
   // Number of bytes, allocated and not yet freed by the application.
-  uptr __asan_get_current_allocated_bytes()
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  size_t __asan_get_current_allocated_bytes();
   // Number of bytes, mmaped by asan allocator to fulfill allocation requests.
   // Generally, for request of X bytes, allocator can reserve and add to free
   // lists a large number of chunks of size X to use them for future requests.
   // All these chunks count toward the heap size. Currently, allocator never
   // releases memory to OS (instead, it just puts freed chunks to free lists).
-  uptr __asan_get_heap_size()
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  size_t __asan_get_heap_size();
   // Number of bytes, mmaped by asan allocator, which can be used to fulfill
   // allocation requests. When a user program frees memory chunk, it can first
   // fall into quarantine and will count toward __asan_get_free_bytes() later.
-  uptr __asan_get_free_bytes()
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  size_t __asan_get_free_bytes();
   // Number of bytes in unmapped pages, that are released to OS. Currently,
   // always returns 0.
-  uptr __asan_get_unmapped_bytes()
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  size_t __asan_get_unmapped_bytes();
   // Prints accumulated stats to stderr. Used for debugging.
-  void __asan_print_accumulated_stats()
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_print_accumulated_stats();
 
   // This function may be optionally provided by user and should return
   // a string containing ASan runtime options. See asan_flags.h for details.
-  /* OPTIONAL */ const char* __asan_default_options()
-      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+  const char* __asan_default_options();
 
   // Malloc hooks that may be optionally provided by user.
   // __asan_malloc_hook(ptr, size) is called immediately after
   //   allocation of "size" bytes, which returned "ptr".
   // __asan_free_hook(ptr) is called immediately before
   //   deallocation of "ptr".
-  /* OPTIONAL */ void __asan_malloc_hook(void *ptr, uptr size)
-      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
-  /* OPTIONAL */ void __asan_free_hook(void *ptr)
-      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_malloc_hook(void *ptr, size_t size);
+  void __asan_free_hook(void *ptr);
+#ifdef __cplusplus
 }  // extern "C"
+#endif
 
 #endif  // SANITIZER_ASAN_INTERFACE_H
diff --git a/include/sanitizer/common_interface_defs.h b/include/sanitizer/common_interface_defs.h
index 9d8fa55..74e3b22 100644
--- a/include/sanitizer/common_interface_defs.h
+++ b/include/sanitizer/common_interface_defs.h
@@ -7,86 +7,42 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file is shared between AddressSanitizer and ThreadSanitizer.
-// It contains basic macro and types.
-// NOTE: This file may be included into user code.
+// Common part of the public sanitizer interface.
 //===----------------------------------------------------------------------===//
 
 #ifndef SANITIZER_COMMON_INTERFACE_DEFS_H
 #define SANITIZER_COMMON_INTERFACE_DEFS_H
 
-// ----------- ATTENTION -------------
-// This header should NOT include any other headers to avoid portability issues.
+#include <stddef.h>
+#include <stdint.h>
 
-#if defined(_WIN32)
-// FIXME find out what we need on Windows. __declspec(dllexport) ?
-# define SANITIZER_INTERFACE_ATTRIBUTE
-# define SANITIZER_WEAK_ATTRIBUTE
-#elif defined(SANITIZER_GO)
-# define SANITIZER_INTERFACE_ATTRIBUTE
-# define SANITIZER_WEAK_ATTRIBUTE
-#else
-# define SANITIZER_INTERFACE_ATTRIBUTE __attribute__((visibility("default")))
-# define SANITIZER_WEAK_ATTRIBUTE  __attribute__((weak))
-#endif
-
-#ifdef __linux__
-# define SANITIZER_SUPPORTS_WEAK_HOOKS 1
-#else
-# define SANITIZER_SUPPORTS_WEAK_HOOKS 0
-#endif
-
-// __has_feature
+// GCC does not understand __has_feature.
 #if !defined(__has_feature)
 # define __has_feature(x) 0
 #endif
 
-// For portability reasons we do not include stddef.h, stdint.h or any other
-// system header, but we do need some basic types that are not defined
-// in a portable way by the language itself.
-namespace __sanitizer {
-
-#if defined(_WIN64)
-// 64-bit Windows uses LLP64 data model.
-typedef unsigned long long uptr;  // NOLINT
-typedef signed   long long sptr;  // NOLINT
-#else
-typedef unsigned long uptr;  // NOLINT
-typedef signed   long sptr;  // NOLINT
-#endif  // defined(_WIN64)
-#if defined(__x86_64__)
-// Since x32 uses ILP32 data model in 64-bit hardware mode,  we must use
-// 64-bit pointer to unwind stack frame.
-typedef unsigned long long uhwptr;  // NOLINT
-#else
-typedef uptr uhwptr;   // NOLINT
-#endif
-typedef unsigned char u8;
-typedef unsigned short u16;  // NOLINT
-typedef unsigned int u32;
-typedef unsigned long long u64;  // NOLINT
-typedef signed   char s8;
-typedef signed   short s16;  // NOLINT
-typedef signed   int s32;
-typedef signed   long long s64;  // NOLINT
-
-}  // namespace __sanitizer
-
+#ifdef __cplusplus
 extern "C" {
+#endif
   // Tell the tools to write their reports to "path.<pid>" instead of stderr.
-  void __sanitizer_set_report_path(const char *path)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __sanitizer_set_report_path(const char *path);
 
   // Tell the tools to write their reports to given file descriptor instead of
   // stderr.
-  void __sanitizer_set_report_fd(int fd)
-      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __sanitizer_set_report_fd(int fd);
 
   // Notify the tools that the sandbox is going to be turned on. The reserved
   // parameter will be used in the future to hold a structure with functions
   // that the tools may call to bypass the sandbox.
-  void __sanitizer_sandbox_on_notify(void *reserved)
-      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+  void __sanitizer_sandbox_on_notify(void *reserved);
+
+  // This function is called by the tool when it has just finished reporting
+  // an error. 'error_summary' is a one-line string that summarizes
+  // the error message. This function can be overridden by the client.
+  void __sanitizer_report_error_summary(const char *error_summary);
+
+#ifdef __cplusplus
 }  // extern "C"
+#endif
 
 #endif  // SANITIZER_COMMON_INTERFACE_DEFS_H
diff --git a/include/sanitizer/msan_interface.h b/include/sanitizer/msan_interface.h
new file mode 100644
index 0000000..ceba661
--- /dev/null
+++ b/include/sanitizer/msan_interface.h
@@ -0,0 +1,105 @@
+//===-- msan_interface.h --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// Public interface header.
+//===----------------------------------------------------------------------===//
+#ifndef MSAN_INTERFACE_H
+#define MSAN_INTERFACE_H
+
+#include <sanitizer/common_interface_defs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if __has_feature(memory_sanitizer)
+  /* Returns a string describing a stack origin.
+     Return NULL if the origin is invalid, or is not a stack origin. */
+  const char *__msan_get_origin_descr_if_stack(uint32_t id);
+
+
+  /* Set raw origin for the memory range. */
+  void __msan_set_origin(void *a, size_t size, uint32_t origin);
+
+  /* Get raw origin for an address. */
+  uint32_t __msan_get_origin(void *a);
+
+  /* Returns non-zero if tracking origins. */
+  int __msan_get_track_origins();
+
+  /* Returns the origin id of the latest UMR in the calling thread. */
+  uint32_t __msan_get_umr_origin();
+
+  /* Make memory region fully initialized (without changing its contents). */
+  void __msan_unpoison(void *a, size_t size);
+
+  /* Make memory region fully uninitialized (without changing its contents). */
+  void __msan_poison(void *a, size_t size);
+
+  /* Make memory region partially uninitialized (without changing its contents).
+   */
+  void __msan_partial_poison(void* data, void* shadow, size_t size);
+
+  /* Returns the offset of the first (at least partially) poisoned byte in the
+     memory range, or -1 if the whole range is good. */
+  intptr_t __msan_test_shadow(const void *x, size_t size);
+
+  /* Set exit code when error(s) were detected.
+     Value of 0 means don't change the program exit code. */
+  void __msan_set_exit_code(int exit_code);
+
+  /* For testing:
+     __msan_set_expect_umr(1);
+     ... some buggy code ...
+     __msan_set_expect_umr(0);
+     The last line will verify that a UMR happened. */
+  void __msan_set_expect_umr(int expect_umr);
+
+  /* Print shadow and origin for the memory range to stdout in a human-readable
+     format. */
+  void __msan_print_shadow(const void *x, size_t size);
+
+  /* Print current function arguments shadow and origin to stdout in a
+     human-readable format. */
+  void __msan_print_param_shadow();
+
+  /* Returns true if running under a dynamic tool (DynamoRio-based). */
+  int  __msan_has_dynamic_component();
+
+  /* Tell MSan about newly allocated memory (ex.: custom allocator).
+     Memory will be marked uninitialized, with origin at the call site. */
+  void __msan_allocated_memory(void* data, size_t size);
+
+#else  // __has_feature(memory_sanitizer)
+
+#define __msan_get_origin_descr_if_stack(id) ((const char*)0)
+#define __msan_set_origin(a, size, origin)
+#define __msan_get_origin(a) ((uint32_t)-1)
+#define __msan_get_track_origins() (0)
+#define __msan_get_umr_origin() ((uint32_t)-1)
+#define __msan_unpoison(a, size)
+#define __msan_poison(a, size)
+#define __msan_partial_poison(data, shadow, size)
+#define __msan_test_shadow(x, size) ((intptr_t)-1)
+#define __msan_set_exit_code(exit_code)
+#define __msan_set_expect_umr(expect_umr)
+#define __msan_print_shadow(x, size)
+#define __msan_print_param_shadow()
+#define __msan_has_dynamic_component() (0)
+#define __msan_allocated_memory(data, size)
+
+#endif   // __has_feature(memory_sanitizer)
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index b7cd07e..2538a4d 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -6,11 +6,15 @@
   add_subdirectory(asan)
   add_subdirectory(interception)
   add_subdirectory(sanitizer_common)
-  add_subdirectory(ubsan)
+  if(NOT ANDROID)
+    add_subdirectory(ubsan)
+  endif()
 endif()
-if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
-  # ThreadSanitizer is supported on Linux only.
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" AND NOT ANDROID)
+  # ThreadSanitizer and MemorySanitizer are supported on Linux only.
   add_subdirectory(tsan)
+  add_subdirectory(msan)
+  add_subdirectory(msandr)
 endif()
 
 # FIXME: Add support for the profile library.
@@ -149,37 +153,36 @@
   umodti3.c
   )
 
-if(CAN_TARGET_X86_64)
-  add_library(clang_rt.x86_64 STATIC
-    x86_64/floatdidf.c
-    x86_64/floatdisf.c
-    x86_64/floatdixf.c
-    x86_64/floatundidf.S
-    x86_64/floatundisf.S
-    x86_64/floatundixf.S
-    ${GENERIC_SOURCES}
-    )
-  set_target_properties(clang_rt.x86_64 PROPERTIES COMPILE_FLAGS "-std=c99 ${TARGET_X86_64_CFLAGS}")
-  add_clang_compiler_rt_libraries(clang_rt.x86_64)
-endif()
-if(CAN_TARGET_I386)
-  add_library(clang_rt.i386 STATIC
-    i386/ashldi3.S
-    i386/ashrdi3.S
-    i386/divdi3.S
-    i386/floatdidf.S
-    i386/floatdisf.S
-    i386/floatdixf.S
-    i386/floatundidf.S
-    i386/floatundisf.S
-    i386/floatundixf.S
-    i386/lshrdi3.S
-    i386/moddi3.S
-    i386/muldi3.S
-    i386/udivdi3.S
-    i386/umoddi3.S
-    ${GENERIC_SOURCES}
-    )
-  set_target_properties(clang_rt.i386 PROPERTIES COMPILE_FLAGS "-std=c99 ${TARGET_I386_CFLAGS}")
-  add_clang_compiler_rt_libraries(clang_rt.i386)
-endif()
+set(x86_64_SOURCES
+  x86_64/floatdidf.c
+  x86_64/floatdisf.c
+  x86_64/floatdixf.c
+  x86_64/floatundidf.S
+  x86_64/floatundisf.S
+  x86_64/floatundixf.S
+  ${GENERIC_SOURCES})
+
+set(i386_SOURCES
+  i386/ashldi3.S
+  i386/ashrdi3.S
+  i386/divdi3.S
+  i386/floatdidf.S
+  i386/floatdisf.S
+  i386/floatdixf.S
+  i386/floatundidf.S
+  i386/floatundisf.S
+  i386/floatundixf.S
+  i386/lshrdi3.S
+  i386/moddi3.S
+  i386/muldi3.S
+  i386/udivdi3.S
+  i386/umoddi3.S
+  ${GENERIC_SOURCES})
+
+foreach(arch x86_64 i386)
+  if(CAN_TARGET_${arch})
+    add_compiler_rt_static_runtime(clang_rt.${arch} ${arch}
+      SOURCES ${${arch}_SOURCES}
+      CFLAGS "-std=c99")
+  endif()
+endforeach()
diff --git a/lib/Makefile.mk b/lib/Makefile.mk
index ea471e0..3068485 100644
--- a/lib/Makefile.mk
+++ b/lib/Makefile.mk
@@ -19,6 +19,7 @@
 SubDirs += profile
 SubDirs += sanitizer_common
 SubDirs += tsan
+SubDirs += msan
 SubDirs += ubsan
 
 # FIXME: We don't currently support building an atomic library, and as it must
diff --git a/lib/arm/aeabi_dcmp.S b/lib/arm/aeabi_dcmp.S
new file mode 100644
index 0000000..c4d0772
--- /dev/null
+++ b/lib/arm/aeabi_dcmp.S
@@ -0,0 +1,39 @@
+//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
+//   int result = __{eq,lt,le,ge,gt}df2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#define DEFINE_AEABI_DCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .align 2                                 SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        mov       r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        mov       r0, #1                         SEPARATOR \
+        pop       { r4, pc }
+
+DEFINE_AEABI_DCMP(eq)
+DEFINE_AEABI_DCMP(lt)
+DEFINE_AEABI_DCMP(le)
+DEFINE_AEABI_DCMP(ge)
+DEFINE_AEABI_DCMP(gt)
diff --git a/lib/arm/aeabi_fcmp.S b/lib/arm/aeabi_fcmp.S
new file mode 100644
index 0000000..576a33f
--- /dev/null
+++ b/lib/arm/aeabi_fcmp.S
@@ -0,0 +1,39 @@
+//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
+//   int result = __{eq,lt,le,ge,gt}sf2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#define DEFINE_AEABI_FCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .align 2                                 SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        mov       r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        mov       r0, #1                         SEPARATOR \
+        pop       { r4, pc }
+
+DEFINE_AEABI_FCMP(eq)
+DEFINE_AEABI_FCMP(lt)
+DEFINE_AEABI_FCMP(le)
+DEFINE_AEABI_FCMP(ge)
+DEFINE_AEABI_FCMP(gt)
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt
index 0f561e6..e451e23 100644
--- a/lib/asan/CMakeLists.txt
+++ b/lib/asan/CMakeLists.txt
@@ -14,6 +14,7 @@
   asan_new_delete.cc
   asan_poisoning.cc
   asan_posix.cc
+  asan_preinit.cc
   asan_report.cc
   asan_rtl.cc
   asan_stack.cc
@@ -25,46 +26,45 @@
 
 set(ASAN_DYLIB_SOURCES
   ${ASAN_SOURCES}
-  dynamic/asan_interceptors_dynamic.cc
   )
 
 include_directories(..)
 
 set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
 
+set(ASAN_COMMON_DEFINITIONS
+  ASAN_HAS_EXCEPTIONS=1)
+
 if(ANDROID)
-  set(ASAN_COMMON_DEFINITIONS
-    ASAN_HAS_EXCEPTIONS=1
+  list(APPEND ASAN_COMMON_DEFINITIONS
     ASAN_FLEXIBLE_MAPPING_AND_OFFSET=0
     ASAN_NEEDS_SEGV=0
-    ASAN_LOW_MEMORY=1
-    )
+    ASAN_LOW_MEMORY=1)
 else()
-  set(ASAN_COMMON_DEFINITIONS
-    ASAN_HAS_EXCEPTIONS=1
-    ASAN_FLEXIBLE_MAPPING_AND_OFFSET=0
-    ASAN_NEEDS_SEGV=1
-    )
+  list(APPEND ASAN_COMMON_DEFINITIONS
+    ASAN_FLEXIBLE_MAPPING_AND_OFFSET=1
+    ASAN_NEEDS_SEGV=1)
 endif()
 
-set(ASAN_DYLIB_DEFINITIONS
-  ${ASAN_COMMON_DEFINITIONS}
-  MAC_INTERPOSE_FUNCTIONS=1
-  )
+# Architectures supported by ASan.
+filter_available_targets(ASAN_SUPPORTED_ARCH
+  x86_64 i386 powerpc64 powerpc)
 
 set(ASAN_RUNTIME_LIBRARIES)
 if(APPLE)
   # Build universal binary on APPLE.
-  add_library(clang_rt.asan_osx STATIC
-    ${ASAN_SOURCES}
-    $<TARGET_OBJECTS:RTInterception.osx>
-    $<TARGET_OBJECTS:RTSanitizerCommon.osx>
-    )
-  set_target_compile_flags(clang_rt.asan_osx ${ASAN_CFLAGS})
-  filter_available_targets(ASAN_TARGETS x86_64 i386)
-  set_target_properties(clang_rt.asan_osx PROPERTIES
-    OSX_ARCHITECTURES "${ASAN_TARGETS}")
-  list(APPEND ASAN_RUNTIME_LIBRARIES clang_rt.asan_osx)
+  add_compiler_rt_osx_dynamic_runtime(clang_rt.asan_osx_dynamic
+    ARCH ${ASAN_SUPPORTED_ARCH}
+    SOURCES ${ASAN_DYLIB_SOURCES}
+            $<TARGET_OBJECTS:RTInterception.osx>
+            $<TARGET_OBJECTS:RTSanitizerCommon.osx>
+    CFLAGS ${ASAN_CFLAGS}
+    DEFS ${ASAN_COMMON_DEFINITIONS}
+    # Dynamic lookup is needed because shadow scale and offset are
+    # provided by the instrumented modules.
+    LINKFLAGS "-framework Foundation"
+              "-undefined dynamic_lookup")
+  list(APPEND ASAN_RUNTIME_LIBRARIES clang_rt.asan_osx_dynamic)
 elseif(ANDROID)
   add_library(clang_rt.asan-arm-android SHARED
     ${ASAN_SOURCES}
@@ -72,64 +72,26 @@
     $<TARGET_OBJECTS:RTSanitizerCommon.arm.android>
     )
   set_target_compile_flags(clang_rt.asan-arm-android
-    ${ASAN_CFLAGS}
-    )
+    ${ASAN_CFLAGS})
+  set_property(TARGET clang_rt.asan-arm-android APPEND PROPERTY
+    COMPILE_DEFINITIONS ${ASAN_COMMON_DEFINITIONS})
   target_link_libraries(clang_rt.asan-arm-android dl)
   list(APPEND ASAN_RUNTIME_LIBRARIES clang_rt.asan-arm-android)
 else()
   # Otherwise, build separate libraries for each target.
-  if(CAN_TARGET_X86_64)
-    add_library(clang_rt.asan-x86_64 STATIC
-      ${ASAN_SOURCES}
-      $<TARGET_OBJECTS:RTInterception.x86_64>
-      $<TARGET_OBJECTS:RTSanitizerCommon.x86_64>
-      )
-    set_target_compile_flags(clang_rt.asan-x86_64
-      ${ASAN_CFLAGS} ${TARGET_X86_64_CFLAGS}
-      )
-    list(APPEND ASAN_RUNTIME_LIBRARIES clang_rt.asan-x86_64)
-  endif()
-  if(CAN_TARGET_I386)
-    add_library(clang_rt.asan-i386 STATIC
-      ${ASAN_SOURCES}
-      $<TARGET_OBJECTS:RTInterception.i386>
-      $<TARGET_OBJECTS:RTSanitizerCommon.i386>
-      )
-    set_target_compile_flags(clang_rt.asan-i386
-      ${ASAN_CFLAGS} ${TARGET_I386_CFLAGS}
-      )
-    list(APPEND ASAN_RUNTIME_LIBRARIES clang_rt.asan-i386)
-  endif()
+  foreach(arch ${ASAN_SUPPORTED_ARCH})
+    add_compiler_rt_static_runtime(clang_rt.asan-${arch} ${arch}
+      SOURCES ${ASAN_SOURCES}
+              $<TARGET_OBJECTS:RTInterception.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+      CFLAGS ${ASAN_CFLAGS}
+      DEFS ${ASAN_COMMON_DEFINITIONS})
+    list(APPEND ASAN_RUNTIME_LIBRARIES clang_rt.asan-${arch})
+  endforeach()
 endif()
 
-set_property(TARGET ${ASAN_RUNTIME_LIBRARIES} APPEND PROPERTY
-  COMPILE_DEFINITIONS ${ASAN_COMMON_DEFINITIONS})
-add_clang_compiler_rt_libraries(${ASAN_RUNTIME_LIBRARIES})
-
-set(ASAN_DYNAMIC_RUNTIME_LIBRARIES)
-if(APPLE)
-  # Build universal binary on APPLE.
-  add_library(clang_rt.asan_osx_dynamic SHARED
-    ${ASAN_DYLIB_SOURCES}
-    $<TARGET_OBJECTS:RTInterception.osx>
-    $<TARGET_OBJECTS:RTSanitizerCommon.osx>
-    )
-  set_target_compile_flags(clang_rt.asan_osx_dynamic ${ASAN_CFLAGS})
-  filter_available_targets(ASAN_TARGETS x86_64 i386)
-  set_target_properties(clang_rt.asan_osx_dynamic PROPERTIES
-    COMPILE_DEFINITIONS "${ASAN_DYLIB_DEFINITIONS}"
-    OSX_ARCHITECTURES "${ASAN_TARGETS}"
-    LINK_FLAGS "-framework Foundation")
-  list(APPEND ASAN_DYNAMIC_RUNTIME_LIBRARIES clang_rt.asan_osx_dynamic)
-endif()
-add_clang_compiler_rt_libraries(${ASAN_DYNAMIC_RUNTIME_LIBRARIES})
-
-
 if(LLVM_INCLUDE_TESTS)
   add_subdirectory(tests)
 endif()
 
-# ASan output tests.
-# FIXME: move all output tests from output_tests/ to lit_tests/ and get rid
-# of the first directory.
 add_subdirectory(lit_tests)
diff --git a/lib/asan/Makefile.mk b/lib/asan/Makefile.mk
index af9602e..97da64b 100644
--- a/lib/asan/Makefile.mk
+++ b/lib/asan/Makefile.mk
@@ -8,7 +8,7 @@
 #===------------------------------------------------------------------------===#
 
 ModuleName := asan
-SubDirs := dynamic
+SubDirs := 
 
 Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
 ObjNames := $(Sources:%.cc=%.o)
@@ -18,7 +18,6 @@
 # FIXME: use automatic dependencies?
 Dependencies := $(wildcard $(Dir)/*.h)
 Dependencies += $(wildcard $(Dir)/../interception/*.h)
-Dependencies += $(wildcard $(Dir)/../interception/mach_override/*.h)
 Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
 
 # Define a convenience variable for all the asan functions.
diff --git a/lib/asan/asan_allocator.cc b/lib/asan/asan_allocator.cc
index 4d422a7..47b00bb 100644
--- a/lib/asan/asan_allocator.cc
+++ b/lib/asan/asan_allocator.cc
@@ -29,18 +29,14 @@
 #if ASAN_ALLOCATOR_VERSION == 1
 #include "asan_interceptors.h"
 #include "asan_internal.h"
-#include "asan_lock.h"
 #include "asan_mapping.h"
 #include "asan_stats.h"
 #include "asan_report.h"
 #include "asan_thread.h"
 #include "asan_thread_registry.h"
-#include "sanitizer/asan_interface.h"
+#include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_atomic.h"
-
-#if defined(_WIN32) && !defined(__clang__)
-#include <intrin.h>
-#endif
+#include "sanitizer_common/sanitizer_mutex.h"
 
 namespace __asan {
 
@@ -61,11 +57,6 @@
 static const uptr kMaxAllowedMallocSize =
     (SANITIZER_WORDSIZE == 32) ? 3UL << 30 : 8UL << 30;
 
-static inline bool IsAligned(uptr a, uptr alignment) {
-  return (a & (alignment - 1)) == 0;
-}
-
-
 static inline uptr SizeClassToSize(u8 size_class) {
   CHECK(size_class < kNumberOfSizeClasses);
   if (size_class <= kMallocSizeClassStepLog) {
@@ -137,7 +128,8 @@
 
   // Second 8 bytes.
   uptr alignment_log : 8;
-  uptr used_size : FIRST_32_SECOND_64(32, 56);  // Size requested by the user.
+  uptr alloc_type    : 2;
+  uptr used_size : FIRST_32_SECOND_64(32, 54);  // Size requested by the user.
 
   // This field may overlap with the user area and thus should not
   // be used while the chunk is in CHUNK_ALLOCATED state.
@@ -187,33 +179,6 @@
                               chunk_->compressed_free_stack_size());
 }
 
-bool AsanChunkView::AddrIsInside(uptr addr, uptr access_size, uptr *offset) {
-  if (addr >= Beg() && (addr + access_size) <= End()) {
-    *offset = addr - Beg();
-    return true;
-  }
-  return false;
-}
-
-bool AsanChunkView::AddrIsAtLeft(uptr addr, uptr access_size, uptr *offset) {
-  if (addr < Beg()) {
-    *offset = Beg() - addr;
-    return true;
-  }
-  return false;
-}
-
-bool AsanChunkView::AddrIsAtRight(uptr addr, uptr access_size, uptr *offset) {
-  if (addr + access_size >= End()) {
-    if (addr <= End())
-      *offset = 0;
-    else
-      *offset = addr - End();
-    return true;
-  }
-  return false;
-}
-
 static AsanChunk *PtrToChunk(uptr ptr) {
   AsanChunk *m = (AsanChunk*)(ptr - REDZONE);
   if (m->chunk_state == CHUNK_MEMALIGN) {
@@ -224,34 +189,13 @@
 
 void AsanChunkFifoList::PushList(AsanChunkFifoList *q) {
   CHECK(q->size() > 0);
-  if (last_) {
-    CHECK(first_);
-    CHECK(!last_->next);
-    last_->next = q->first_;
-    last_ = q->last_;
-  } else {
-    CHECK(!first_);
-    last_ = q->last_;
-    first_ = q->first_;
-    CHECK(first_);
-  }
-  CHECK(last_);
-  CHECK(!last_->next);
   size_ += q->size();
+  append_back(q);
   q->clear();
 }
 
 void AsanChunkFifoList::Push(AsanChunk *n) {
-  CHECK(n->next == 0);
-  if (last_) {
-    CHECK(first_);
-    CHECK(!last_->next);
-    last_->next = n;
-    last_ = n;
-  } else {
-    CHECK(!first_);
-    last_ = first_ = n;
-  }
+  push_back(n);
   size_ += n->Size();
 }
 
@@ -260,15 +204,9 @@
 // ago. Not sure if we can or want to do anything with this.
 AsanChunk *AsanChunkFifoList::Pop() {
   CHECK(first_);
-  AsanChunk *res = first_;
-  first_ = first_->next;
-  if (first_ == 0)
-    last_ = 0;
-  CHECK(size_ >= res->Size());
+  AsanChunk *res = front();
   size_ -= res->Size();
-  if (last_) {
-    CHECK(!last_->next);
-  }
+  pop_front();
   return res;
 }
 
@@ -291,7 +229,7 @@
     AsanChunk *m = 0;
     AsanChunk **fl = &free_lists_[size_class];
     {
-      ScopedLock lock(&mu_);
+      BlockingMutexLock lock(&mu_);
       for (uptr i = 0; i < n_chunks; i++) {
         if (!(*fl)) {
           *fl = GetNewChunks(size_class);
@@ -309,7 +247,7 @@
   void SwallowThreadLocalMallocStorage(AsanThreadLocalMallocStorage *x,
                                        bool eat_free_lists) {
     CHECK(flags()->quarantine_size > 0);
-    ScopedLock lock(&mu_);
+    BlockingMutexLock lock(&mu_);
     AsanChunkFifoList *q = &x->quarantine_;
     if (q->size() > 0) {
       quarantine_.PushList(q);
@@ -333,18 +271,18 @@
   }
 
   void BypassThreadLocalQuarantine(AsanChunk *chunk) {
-    ScopedLock lock(&mu_);
+    BlockingMutexLock lock(&mu_);
     quarantine_.Push(chunk);
   }
 
   AsanChunk *FindChunkByAddr(uptr addr) {
-    ScopedLock lock(&mu_);
+    BlockingMutexLock lock(&mu_);
     return FindChunkByAddrUnlocked(addr);
   }
 
   uptr AllocationSize(uptr ptr) {
     if (!ptr) return 0;
-    ScopedLock lock(&mu_);
+    BlockingMutexLock lock(&mu_);
 
     // Make sure this is our chunk and |ptr| actually points to the beginning
     // of the allocated memory.
@@ -367,7 +305,7 @@
   }
 
   void PrintStatus() {
-    ScopedLock lock(&mu_);
+    BlockingMutexLock lock(&mu_);
     uptr malloced = 0;
 
     Printf(" MallocInfo: in quarantine: %zu malloced: %zu; ",
@@ -385,7 +323,7 @@
   }
 
   PageGroup *FindPageGroup(uptr addr) {
-    ScopedLock lock(&mu_);
+    BlockingMutexLock lock(&mu_);
     return FindPageGroupUnlocked(addr);
   }
 
@@ -431,7 +369,7 @@
         left_chunk->chunk_state != CHUNK_AVAILABLE)
       return left_chunk;
     // Choose based on offset.
-    uptr l_offset = 0, r_offset = 0;
+    sptr l_offset = 0, r_offset = 0;
     CHECK(AsanChunkView(left_chunk).AddrIsAtRight(addr, 1, &l_offset));
     CHECK(AsanChunkView(right_chunk).AddrIsAtLeft(addr, 1, &r_offset));
     if (l_offset < r_offset)
@@ -451,7 +389,7 @@
     CHECK(m->chunk_state == CHUNK_ALLOCATED ||
           m->chunk_state == CHUNK_AVAILABLE ||
           m->chunk_state == CHUNK_QUARANTINE);
-    uptr offset = 0;
+    sptr offset = 0;
     AsanChunkView m_view(m);
     if (m_view.AddrIsInside(addr, 1, &offset))
       return m;
@@ -543,7 +481,7 @@
 
   AsanChunk *free_lists_[kNumberOfSizeClasses];
   AsanChunkFifoList quarantine_;
-  AsanLock mu_;
+  BlockingMutex mu_;
 
   PageGroup *page_groups_[kMaxAvailableRam / kMinMmapSize];
   atomic_uint32_t n_page_groups_;
@@ -560,7 +498,8 @@
   return AsanChunkView(malloc_info.FindChunkByAddr(address));
 }
 
-static u8 *Allocate(uptr alignment, uptr size, StackTrace *stack) {
+static u8 *Allocate(uptr alignment, uptr size, StackTrace *stack,
+                    AllocType alloc_type) {
   __asan_init();
   CHECK(stack);
   if (size == 0) {
@@ -617,6 +556,7 @@
   CHECK(m);
   CHECK(m->chunk_state == CHUNK_AVAILABLE);
   m->chunk_state = CHUNK_ALLOCATED;
+  m->alloc_type = alloc_type;
   m->next = 0;
   CHECK(m->Size() == size_to_allocate);
   uptr addr = (uptr)m + REDZONE;
@@ -651,7 +591,7 @@
   return (u8*)addr;
 }
 
-static void Deallocate(u8 *ptr, StackTrace *stack) {
+static void Deallocate(u8 *ptr, StackTrace *stack, AllocType alloc_type) {
   if (!ptr) return;
   CHECK(stack);
 
@@ -672,6 +612,9 @@
     ReportFreeNotMalloced((uptr)ptr, stack);
   }
   CHECK(old_chunk_state == CHUNK_ALLOCATED);
+  if (m->alloc_type != alloc_type && flags()->alloc_dealloc_mismatch)
+    ReportAllocTypeMismatch((uptr)ptr, stack,
+                            (AllocType)m->alloc_type, (AllocType)alloc_type);
   // With REDZONE==16 m->next is in the user area, otherwise it should be 0.
   CHECK(REDZONE <= 16 || !m->next);
   CHECK(m->free_tid == kInvalidTid);
@@ -716,11 +659,11 @@
   CHECK(m->chunk_state == CHUNK_ALLOCATED);
   uptr old_size = m->used_size;
   uptr memcpy_size = Min(new_size, old_size);
-  u8 *new_ptr = Allocate(0, new_size, stack);
+  u8 *new_ptr = Allocate(0, new_size, stack, FROM_MALLOC);
   if (new_ptr) {
     CHECK(REAL(memcpy) != 0);
     REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
-    Deallocate(old_ptr, stack);
+    Deallocate(old_ptr, stack, FROM_MALLOC);
   }
   return new_ptr;
 }
@@ -744,28 +687,35 @@
 
 namespace __asan {
 
+void InitializeAllocator() { }
+
+void PrintInternalAllocatorStats() {
+}
+
 SANITIZER_INTERFACE_ATTRIBUTE
-void *asan_memalign(uptr alignment, uptr size, StackTrace *stack) {
-  void *ptr = (void*)Allocate(alignment, size, stack);
+void *asan_memalign(uptr alignment, uptr size, StackTrace *stack,
+                    AllocType alloc_type) {
+  void *ptr = (void*)Allocate(alignment, size, stack, alloc_type);
   ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void asan_free(void *ptr, StackTrace *stack) {
+void asan_free(void *ptr, StackTrace *stack, AllocType alloc_type) {
   ASAN_FREE_HOOK(ptr);
-  Deallocate((u8*)ptr, stack);
+  Deallocate((u8*)ptr, stack, alloc_type);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void *asan_malloc(uptr size, StackTrace *stack) {
-  void *ptr = (void*)Allocate(0, size, stack);
+  void *ptr = (void*)Allocate(0, size, stack, FROM_MALLOC);
   ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
 
 void *asan_calloc(uptr nmemb, uptr size, StackTrace *stack) {
-  void *ptr = (void*)Allocate(0, nmemb * size, stack);
+  if (__sanitizer::CallocShouldReturnNullDueToOverflow(size, nmemb)) return 0;
+  void *ptr = (void*)Allocate(0, nmemb * size, stack, FROM_MALLOC);
   if (ptr)
     REAL(memset)(ptr, 0, nmemb * size);
   ASAN_MALLOC_HOOK(ptr, size);
@@ -774,19 +724,19 @@
 
 void *asan_realloc(void *p, uptr size, StackTrace *stack) {
   if (p == 0) {
-    void *ptr = (void*)Allocate(0, size, stack);
+    void *ptr = (void*)Allocate(0, size, stack, FROM_MALLOC);
     ASAN_MALLOC_HOOK(ptr, size);
     return ptr;
   } else if (size == 0) {
     ASAN_FREE_HOOK(p);
-    Deallocate((u8*)p, stack);
+    Deallocate((u8*)p, stack, FROM_MALLOC);
     return 0;
   }
   return Reallocate((u8*)p, size, stack);
 }
 
 void *asan_valloc(uptr size, StackTrace *stack) {
-  void *ptr = (void*)Allocate(GetPageSizeCached(), size, stack);
+  void *ptr = (void*)Allocate(GetPageSizeCached(), size, stack, FROM_MALLOC);
   ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
@@ -798,14 +748,14 @@
     // pvalloc(0) should allocate one page.
     size = PageSize;
   }
-  void *ptr = (void*)Allocate(PageSize, size, stack);
+  void *ptr = (void*)Allocate(PageSize, size, stack, FROM_MALLOC);
   ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
 
 int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
                           StackTrace *stack) {
-  void *ptr = Allocate(alignment, size, stack);
+  void *ptr = Allocate(alignment, size, stack, FROM_MALLOC);
   CHECK(IsAligned((uptr)ptr, alignment));
   ASAN_MALLOC_HOOK(ptr, size);
   *memptr = ptr;
@@ -855,7 +805,7 @@
   uptr allocated_size = malloc_info.AllocationSize((uptr)p);
   // Die if p is not malloced or if it is already freed.
   if (allocated_size == 0) {
-    GET_STACK_TRACE_HERE(kStackTraceMax);
+    GET_STACK_TRACE_FATAL_HERE;
     ReportAsanGetAllocatedSizeNotOwned((uptr)p, &stack);
   }
   return allocated_size;
diff --git a/lib/asan/asan_allocator.h b/lib/asan/asan_allocator.h
index 1e936c3..9ba2542 100644
--- a/lib/asan/asan_allocator.h
+++ b/lib/asan/asan_allocator.h
@@ -17,18 +17,33 @@
 
 #include "asan_internal.h"
 #include "asan_interceptors.h"
+#include "sanitizer_common/sanitizer_list.h"
 
 // We are in the process of transitioning from the old allocator (version 1)
 // to a new one (version 2). The change is quite intrusive so both allocators
 // will co-exist in the source base for a while. The actual allocator is chosen
-// at build time by redefining this macrozz.
-#define ASAN_ALLOCATOR_VERSION 1
+// at build time by redefining this macro.
+#ifndef ASAN_ALLOCATOR_VERSION
+# if (ASAN_LINUX && !ASAN_ANDROID) || ASAN_MAC || ASAN_WINDOWS
+#  define ASAN_ALLOCATOR_VERSION 2
+# else
+#  define ASAN_ALLOCATOR_VERSION 1
+# endif
+#endif  // ASAN_ALLOCATOR_VERSION
 
 namespace __asan {
 
+enum AllocType {
+  FROM_MALLOC = 1,  // Memory block came from malloc, calloc, realloc, etc.
+  FROM_NEW = 2,     // Memory block came from operator new.
+  FROM_NEW_BR = 3   // Memory block came from operator new [ ]
+};
+
 static const uptr kNumberOfSizeClasses = 255;
 struct AsanChunk;
 
+void InitializeAllocator();
+
 class AsanChunkView {
  public:
   explicit AsanChunkView(AsanChunk *chunk) : chunk_(chunk) {}
@@ -40,16 +55,37 @@
   uptr FreeTid();
   void GetAllocStack(StackTrace *stack);
   void GetFreeStack(StackTrace *stack);
-  bool AddrIsInside(uptr addr, uptr access_size, uptr *offset);
-  bool AddrIsAtLeft(uptr addr, uptr access_size, uptr *offset);
-  bool AddrIsAtRight(uptr addr, uptr access_size, uptr *offset);
+  bool AddrIsInside(uptr addr, uptr access_size, sptr *offset) {
+    if (addr >= Beg() && (addr + access_size) <= End()) {
+      *offset = addr - Beg();
+      return true;
+    }
+    return false;
+  }
+  bool AddrIsAtLeft(uptr addr, uptr access_size, sptr *offset) {
+    (void)access_size;
+    if (addr < Beg()) {
+      *offset = Beg() - addr;
+      return true;
+    }
+    return false;
+  }
+  bool AddrIsAtRight(uptr addr, uptr access_size, sptr *offset) {
+    if (addr + access_size > End()) {
+      *offset = addr - End();
+      return true;
+    }
+    return false;
+  }
+
  private:
   AsanChunk *const chunk_;
 };
 
 AsanChunkView FindHeapChunkByAddress(uptr address);
 
-class AsanChunkFifoList {
+// List of AsanChunks with total size.
+class AsanChunkFifoList: public IntrusiveList<AsanChunk> {
  public:
   explicit AsanChunkFifoList(LinkerInitialized) { }
   AsanChunkFifoList() { clear(); }
@@ -58,25 +94,31 @@
   AsanChunk *Pop();
   uptr size() { return size_; }
   void clear() {
-    first_ = last_ = 0;
+    IntrusiveList<AsanChunk>::clear();
     size_ = 0;
   }
  private:
-  AsanChunk *first_;
-  AsanChunk *last_;
   uptr size_;
 };
 
 struct AsanThreadLocalMallocStorage {
   explicit AsanThreadLocalMallocStorage(LinkerInitialized x)
-      : quarantine_(x) { }
+#if ASAN_ALLOCATOR_VERSION == 1
+      : quarantine_(x)
+#endif
+      { }
   AsanThreadLocalMallocStorage() {
     CHECK(REAL(memset));
     REAL(memset)(this, 0, sizeof(AsanThreadLocalMallocStorage));
   }
 
+#if ASAN_ALLOCATOR_VERSION == 1
   AsanChunkFifoList quarantine_;
   AsanChunk *free_lists_[kNumberOfSizeClasses];
+#else
+  uptr quarantine_cache[16];
+  uptr allocator2_cache[96 * (512 * 8 + 16)];  // Opaque.
+#endif
   void CommitBack();
 };
 
@@ -164,8 +206,9 @@
   FakeFrameLifo call_stack_;
 };
 
-void *asan_memalign(uptr alignment, uptr size, StackTrace *stack);
-void asan_free(void *ptr, StackTrace *stack);
+void *asan_memalign(uptr alignment, uptr size, StackTrace *stack,
+                    AllocType alloc_type);
+void asan_free(void *ptr, StackTrace *stack, AllocType alloc_type);
 
 void *asan_malloc(uptr size, StackTrace *stack);
 void *asan_calloc(uptr nmemb, uptr size, StackTrace *stack);
@@ -181,40 +224,7 @@
 void asan_mz_force_lock();
 void asan_mz_force_unlock();
 
-// Log2 and RoundUpToPowerOfTwo should be inlined for performance.
-
-static inline uptr Log2(uptr x) {
-  CHECK(IsPowerOfTwo(x));
-#if !defined(_WIN32) || defined(__clang__)
-  return __builtin_ctzl(x);
-#elif defined(_WIN64)
-  unsigned long ret;  // NOLINT
-  _BitScanForward64(&ret, x);
-  return ret;
-#else
-  unsigned long ret;  // NOLINT
-  _BitScanForward(&ret, x);
-  return ret;
-#endif
-}
-
-static inline uptr RoundUpToPowerOfTwo(uptr size) {
-  CHECK(size);
-  if (IsPowerOfTwo(size)) return size;
-
-  unsigned long up;  // NOLINT
-#if !defined(_WIN32) || defined(__clang__)
-  up = SANITIZER_WORDSIZE - 1 - __builtin_clzl(size);
-#elif defined(_WIN64)
-  _BitScanReverse64(&up, size);
-#else
-  _BitScanReverse(&up, size);
-#endif
-  CHECK(size < (1ULL << (up + 1)));
-  CHECK(size > (1ULL << up));
-  return 1UL << (up + 1);
-}
-
+void PrintInternalAllocatorStats();
 
 }  // namespace __asan
 #endif  // ASAN_ALLOCATOR_H
diff --git a/lib/asan/asan_allocator2.cc b/lib/asan/asan_allocator2.cc
index bb3a15e..3288f28 100644
--- a/lib/asan/asan_allocator2.cc
+++ b/lib/asan/asan_allocator2.cc
@@ -18,26 +18,696 @@
 #include "asan_allocator.h"
 #if ASAN_ALLOCATOR_VERSION == 2
 
+#include "asan_mapping.h"
+#include "asan_report.h"
+#include "asan_thread.h"
+#include "asan_thread_registry.h"
 #include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_list.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_quarantine.h"
 
 namespace __asan {
 
+struct AsanMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const {
+    PoisonShadow(p, size, kAsanHeapLeftRedzoneMagic);
+    // Statistics.
+    AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+    thread_stats.mmaps++;
+    thread_stats.mmaped += size;
+  }
+  void OnUnmap(uptr p, uptr size) const {
+    PoisonShadow(p, size, 0);
+    // We are about to unmap a chunk of user memory.
+    // Mark the corresponding shadow memory as not needed.
+    // Since asan's mapping is compacting, the shadow chunk may be
+    // not page-aligned, so we only flush the page-aligned portion.
+    uptr page_size = GetPageSizeCached();
+    uptr shadow_beg = RoundUpTo(MemToShadow(p), page_size);
+    uptr shadow_end = RoundDownTo(MemToShadow(p + size), page_size);
+    FlushUnneededShadowMemory(shadow_beg, shadow_end - shadow_beg);
+    // Statistics.
+    AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+    thread_stats.munmaps++;
+    thread_stats.munmaped += size;
+  }
+};
+
 #if SANITIZER_WORDSIZE == 64
+#if defined(__powerpc64__)
+const uptr kAllocatorSpace =  0xa0000000000ULL;
+#else
 const uptr kAllocatorSpace = 0x600000000000ULL;
-const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
+#endif
+const uptr kAllocatorSize  =  0x40000000000ULL;  // 4T.
+typedef DefaultSizeClassMap SizeClassMap;
 typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0 /*metadata*/,
-    DefaultSizeClassMap> PrimaryAllocator;
+    SizeClassMap, AsanMapUnmapCallback> PrimaryAllocator;
 #elif SANITIZER_WORDSIZE == 32
 static const u64 kAddressSpaceSize = 1ULL << 32;
-typedef SizeClassAllocator32<
-  0, kAddressSpaceSize, 16, CompactSizeClassMap> PrimaryAllocator;
+typedef CompactSizeClassMap SizeClassMap;
+typedef SizeClassAllocator32<0, kAddressSpaceSize, 16,
+  SizeClassMap, AsanMapUnmapCallback> PrimaryAllocator;
 #endif
 
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
-typedef LargeMmapAllocator SecondaryAllocator;
+typedef LargeMmapAllocator<AsanMapUnmapCallback> SecondaryAllocator;
 typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
     SecondaryAllocator> Allocator;
 
+// We can not use THREADLOCAL because it is not supported on some of the
+// platforms we care about (OSX 10.6, Android).
+// static THREADLOCAL AllocatorCache cache;
+AllocatorCache *GetAllocatorCache(AsanThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator2_cache));
+  return reinterpret_cast<AllocatorCache *>(ms->allocator2_cache);
+}
+
+static Allocator allocator;
+
+static const uptr kMaxAllowedMallocSize =
+  FIRST_32_SECOND_64(3UL << 30, 8UL << 30);
+
+static const uptr kMaxThreadLocalQuarantine =
+  FIRST_32_SECOND_64(1 << 18, 1 << 20);
+
+// Every chunk of memory allocated by this allocator can be in one of 3 states:
+// CHUNK_AVAILABLE: the chunk is in the free list and ready to be allocated.
+// CHUNK_ALLOCATED: the chunk is allocated and not yet freed.
+// CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone.
+enum {
+  CHUNK_AVAILABLE  = 0,  // 0 is the default value even if we didn't set it.
+  CHUNK_ALLOCATED  = 2,
+  CHUNK_QUARANTINE = 3
+};
+
+// Valid redzone sizes are 16, 32, 64, ... 2048, so we encode them in 3 bits.
+// We use adaptive redzones: for larger allocation larger redzones are used.
+static u32 RZLog2Size(u32 rz_log) {
+  CHECK_LT(rz_log, 8);
+  return 16 << rz_log;
+}
+
+static u32 RZSize2Log(u32 rz_size) {
+  CHECK_GE(rz_size, 16);
+  CHECK_LE(rz_size, 2048);
+  CHECK(IsPowerOfTwo(rz_size));
+  u32 res = Log2(rz_size) - 4;
+  CHECK_EQ(rz_size, RZLog2Size(res));
+  return res;
+}
+
+static uptr ComputeRZLog(uptr user_requested_size) {
+  u32 rz_log =
+    user_requested_size <= 64        - 16   ? 0 :
+    user_requested_size <= 128       - 32   ? 1 :
+    user_requested_size <= 512       - 64   ? 2 :
+    user_requested_size <= 4096      - 128  ? 3 :
+    user_requested_size <= (1 << 14) - 256  ? 4 :
+    user_requested_size <= (1 << 15) - 512  ? 5 :
+    user_requested_size <= (1 << 16) - 1024 ? 6 : 7;
+  return Max(rz_log, RZSize2Log(flags()->redzone));
+}
+
+// The memory chunk allocated from the underlying allocator looks like this:
+// L L L L L L H H U U U U U U R R
+//   L -- left redzone words (0 or more bytes)
+//   H -- ChunkHeader (16 bytes), which is also a part of the left redzone.
+//   U -- user memory.
+//   R -- right redzone (0 or more bytes)
+// ChunkBase consists of ChunkHeader and other bytes that overlap with user
+// memory.
+
+// If a memory chunk is allocated by memalign and we had to increase the
+// allocation size to achieve the proper alignment, then we store this magic
+// value in the first uptr word of the memory block and store the address of
+// ChunkBase in the next uptr.
+// M B ? ? ? L L L L L L  H H U U U U U U
+//   M -- magic value kMemalignMagic
+//   B -- address of ChunkHeader pointing to the first 'H'
+static const uptr kMemalignMagic = 0xCC6E96B9;
+
+struct ChunkHeader {
+  // 1-st 8 bytes.
+  u32 chunk_state       : 8;  // Must be first.
+  u32 alloc_tid         : 24;
+
+  u32 free_tid          : 24;
+  u32 from_memalign     : 1;
+  u32 alloc_type        : 2;
+  u32 rz_log            : 3;
+  // 2-nd 8 bytes
+  // This field is used for small sizes. For large sizes it is equal to
+  // SizeClassMap::kMaxSize and the actual size is stored in the
+  // SecondaryAllocator's metadata.
+  u32 user_requested_size;
+  u32 alloc_context_id;
+};
+
+struct ChunkBase : ChunkHeader {
+  // Header2, intersects with user memory.
+  AsanChunk *next;
+  u32 free_context_id;
+};
+
+static const uptr kChunkHeaderSize = sizeof(ChunkHeader);
+static const uptr kChunkHeader2Size = sizeof(ChunkBase) - kChunkHeaderSize;
+COMPILER_CHECK(kChunkHeaderSize == 16);
+COMPILER_CHECK(kChunkHeader2Size <= 16);
+
+struct AsanChunk: ChunkBase {
+  uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; }
+  uptr UsedSize() {
+    if (user_requested_size != SizeClassMap::kMaxSize)
+      return user_requested_size;
+    return *reinterpret_cast<uptr *>(allocator.GetMetaData(AllocBeg()));
+  }
+  void *AllocBeg() {
+    if (from_memalign)
+      return allocator.GetBlockBegin(reinterpret_cast<void *>(this));
+    return reinterpret_cast<void*>(Beg() - RZLog2Size(rz_log));
+  }
+  // We store the alloc/free stack traces in the chunk itself.
+  u32 *AllocStackBeg() {
+    return (u32*)(Beg() - RZLog2Size(rz_log));
+  }
+  uptr AllocStackSize() {
+    CHECK_LE(RZLog2Size(rz_log), kChunkHeaderSize);
+    return (RZLog2Size(rz_log) - kChunkHeaderSize) / sizeof(u32);
+  }
+  u32 *FreeStackBeg() {
+    return (u32*)(Beg() + kChunkHeader2Size);
+  }
+  uptr FreeStackSize() {
+    if (user_requested_size < kChunkHeader2Size) return 0;
+    uptr available = RoundUpTo(user_requested_size, SHADOW_GRANULARITY);
+    return (available - kChunkHeader2Size) / sizeof(u32);
+  }
+};
+
+uptr AsanChunkView::Beg() { return chunk_->Beg(); }
+uptr AsanChunkView::End() { return Beg() + UsedSize(); }
+uptr AsanChunkView::UsedSize() { return chunk_->UsedSize(); }
+uptr AsanChunkView::AllocTid() { return chunk_->alloc_tid; }
+uptr AsanChunkView::FreeTid() { return chunk_->free_tid; }
+
+static void GetStackTraceFromId(u32 id, StackTrace *stack) {
+  CHECK(id);
+  uptr size = 0;
+  const uptr *trace = StackDepotGet(id, &size);
+  CHECK_LT(size, kStackTraceMax);
+  internal_memcpy(stack->trace, trace, sizeof(uptr) * size);
+  stack->size = size;
+}
+
+void AsanChunkView::GetAllocStack(StackTrace *stack) {
+  if (flags()->use_stack_depot)
+    GetStackTraceFromId(chunk_->alloc_context_id, stack);
+  else
+    StackTrace::UncompressStack(stack, chunk_->AllocStackBeg(),
+                                chunk_->AllocStackSize());
+}
+
+void AsanChunkView::GetFreeStack(StackTrace *stack) {
+  if (flags()->use_stack_depot)
+    GetStackTraceFromId(chunk_->free_context_id, stack);
+  else
+    StackTrace::UncompressStack(stack, chunk_->FreeStackBeg(),
+                                chunk_->FreeStackSize());
+}
+
+struct QuarantineCallback;
+typedef Quarantine<QuarantineCallback, AsanChunk> AsanQuarantine;
+typedef AsanQuarantine::Cache QuarantineCache;
+static AsanQuarantine quarantine(LINKER_INITIALIZED);
+static QuarantineCache fallback_quarantine_cache(LINKER_INITIALIZED);
+static AllocatorCache fallback_allocator_cache;
+static SpinMutex fallback_mutex;
+
+QuarantineCache *GetQuarantineCache(AsanThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  CHECK_LE(sizeof(QuarantineCache), sizeof(ms->quarantine_cache));
+  return reinterpret_cast<QuarantineCache *>(ms->quarantine_cache);
+}
+
+struct QuarantineCallback {
+  explicit QuarantineCallback(AllocatorCache *cache)
+      : cache_(cache) {
+  }
+
+  void Recycle(AsanChunk *m) {
+    CHECK(m->chunk_state == CHUNK_QUARANTINE);
+    m->chunk_state = CHUNK_AVAILABLE;
+    CHECK_NE(m->alloc_tid, kInvalidTid);
+    CHECK_NE(m->free_tid, kInvalidTid);
+    PoisonShadow(m->Beg(),
+                 RoundUpTo(m->UsedSize(), SHADOW_GRANULARITY),
+                 kAsanHeapLeftRedzoneMagic);
+    void *p = reinterpret_cast<void *>(m->AllocBeg());
+    if (m->from_memalign) {
+      uptr *memalign_magic = reinterpret_cast<uptr *>(p);
+      CHECK_EQ(memalign_magic[0], kMemalignMagic);
+      CHECK_EQ(memalign_magic[1], reinterpret_cast<uptr>(m));
+    }
+
+    // Statistics.
+    AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+    thread_stats.real_frees++;
+    thread_stats.really_freed += m->UsedSize();
+
+    allocator.Deallocate(cache_, p);
+  }
+
+  void *Allocate(uptr size) {
+    return allocator.Allocate(cache_, size, 1, false);
+  }
+
+  void Deallocate(void *p) {
+    allocator.Deallocate(cache_, p);
+  }
+
+  AllocatorCache *cache_;
+};
+
+void InitializeAllocator() {
+  allocator.Init();
+  quarantine.Init((uptr)flags()->quarantine_size, kMaxThreadLocalQuarantine);
+}
+
+static void *Allocate(uptr size, uptr alignment, StackTrace *stack,
+                      AllocType alloc_type) {
+  if (!asan_inited)
+    __asan_init();
+  CHECK(stack);
+  const uptr min_alignment = SHADOW_GRANULARITY;
+  if (alignment < min_alignment)
+    alignment = min_alignment;
+  if (size == 0) {
+    // We'd be happy to avoid allocating memory for zero-size requests, but
+    // some programs/tests depend on this behavior and assume that malloc would
+    // not return NULL even for zero-size allocations. Moreover, it looks like
+    // operator new should never return NULL, and results of consecutive "new"
+    // calls must be different even if the allocated size is zero.
+    size = 1;
+  }
+  CHECK(IsPowerOfTwo(alignment));
+  uptr rz_log = ComputeRZLog(size);
+  uptr rz_size = RZLog2Size(rz_log);
+  uptr rounded_size = RoundUpTo(size, alignment);
+  if (rounded_size < kChunkHeader2Size)
+    rounded_size = kChunkHeader2Size;
+  uptr needed_size = rounded_size + rz_size;
+  if (alignment > min_alignment)
+    needed_size += alignment;
+  bool using_primary_allocator = true;
+  // If we are allocating from the secondary allocator, there will be no
+  // automatic right redzone, so add the right redzone manually.
+  if (!PrimaryAllocator::CanAllocate(needed_size, alignment)) {
+    needed_size += rz_size;
+    using_primary_allocator = false;
+  }
+  CHECK(IsAligned(needed_size, min_alignment));
+  if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize) {
+    Report("WARNING: AddressSanitizer failed to allocate %p bytes\n",
+           (void*)size);
+    return 0;
+  }
+
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  void *allocated;
+  if (t) {
+    AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+    allocated = allocator.Allocate(cache, needed_size, 8, false);
+  } else {
+    SpinMutexLock l(&fallback_mutex);
+    AllocatorCache *cache = &fallback_allocator_cache;
+    allocated = allocator.Allocate(cache, needed_size, 8, false);
+  }
+  uptr alloc_beg = reinterpret_cast<uptr>(allocated);
+  // Clear the first allocated word (an old kMemalignMagic may still be there).
+  reinterpret_cast<uptr *>(alloc_beg)[0] = 0;
+  uptr alloc_end = alloc_beg + needed_size;
+  uptr beg_plus_redzone = alloc_beg + rz_size;
+  uptr user_beg = beg_plus_redzone;
+  if (!IsAligned(user_beg, alignment))
+    user_beg = RoundUpTo(user_beg, alignment);
+  uptr user_end = user_beg + size;
+  CHECK_LE(user_end, alloc_end);
+  uptr chunk_beg = user_beg - kChunkHeaderSize;
+  AsanChunk *m = reinterpret_cast<AsanChunk *>(chunk_beg);
+  m->chunk_state = CHUNK_ALLOCATED;
+  m->alloc_type = alloc_type;
+  m->rz_log = rz_log;
+  u32 alloc_tid = t ? t->tid() : 0;
+  m->alloc_tid = alloc_tid;
+  CHECK_EQ(alloc_tid, m->alloc_tid);  // Does alloc_tid fit into the bitfield?
+  m->free_tid = kInvalidTid;
+  m->from_memalign = user_beg != beg_plus_redzone;
+  if (m->from_memalign) {
+    CHECK_LE(beg_plus_redzone + 2 * sizeof(uptr), user_beg);
+    uptr *memalign_magic = reinterpret_cast<uptr *>(alloc_beg);
+    memalign_magic[0] = kMemalignMagic;
+    memalign_magic[1] = chunk_beg;
+  }
+  if (using_primary_allocator) {
+    CHECK(size);
+    m->user_requested_size = size;
+    CHECK(allocator.FromPrimary(allocated));
+  } else {
+    CHECK(!allocator.FromPrimary(allocated));
+    m->user_requested_size = SizeClassMap::kMaxSize;
+    uptr *meta = reinterpret_cast<uptr *>(allocator.GetMetaData(allocated));
+    meta[0] = size;
+    meta[1] = chunk_beg;
+  }
+
+  if (flags()->use_stack_depot) {
+    m->alloc_context_id = StackDepotPut(stack->trace, stack->size);
+  } else {
+    m->alloc_context_id = 0;
+    StackTrace::CompressStack(stack, m->AllocStackBeg(), m->AllocStackSize());
+  }
+
+  uptr size_rounded_down_to_granularity = RoundDownTo(size, SHADOW_GRANULARITY);
+  // Unpoison the bulk of the memory region.
+  if (size_rounded_down_to_granularity)
+    PoisonShadow(user_beg, size_rounded_down_to_granularity, 0);
+  // Deal with the end of the region if size is not aligned to granularity.
+  if (size != size_rounded_down_to_granularity && flags()->poison_heap) {
+    u8 *shadow = (u8*)MemToShadow(user_beg + size_rounded_down_to_granularity);
+    *shadow = size & (SHADOW_GRANULARITY - 1);
+  }
+
+  AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+  thread_stats.mallocs++;
+  thread_stats.malloced += size;
+  thread_stats.malloced_redzones += needed_size - size;
+  uptr class_id = Min(kNumberOfSizeClasses, SizeClassMap::ClassID(needed_size));
+  thread_stats.malloced_by_size[class_id]++;
+  if (needed_size > SizeClassMap::kMaxSize)
+    thread_stats.malloc_large++;
+
+  void *res = reinterpret_cast<void *>(user_beg);
+  ASAN_MALLOC_HOOK(res, size);
+  return res;
+}
+
+static void Deallocate(void *ptr, StackTrace *stack, AllocType alloc_type) {
+  uptr p = reinterpret_cast<uptr>(ptr);
+  if (p == 0) return;
+  ASAN_FREE_HOOK(ptr);
+  uptr chunk_beg = p - kChunkHeaderSize;
+  AsanChunk *m = reinterpret_cast<AsanChunk *>(chunk_beg);
+
+  // Flip the chunk_state atomically to avoid race on double-free.
+  u8 old_chunk_state = atomic_exchange((atomic_uint8_t*)m, CHUNK_QUARANTINE,
+                                       memory_order_relaxed);
+
+  if (old_chunk_state == CHUNK_QUARANTINE)
+    ReportDoubleFree((uptr)ptr, stack);
+  else if (old_chunk_state != CHUNK_ALLOCATED)
+    ReportFreeNotMalloced((uptr)ptr, stack);
+  CHECK(old_chunk_state == CHUNK_ALLOCATED);
+  if (m->alloc_type != alloc_type && flags()->alloc_dealloc_mismatch)
+    ReportAllocTypeMismatch((uptr)ptr, stack,
+                            (AllocType)m->alloc_type, (AllocType)alloc_type);
+
+  CHECK_GE(m->alloc_tid, 0);
+  if (SANITIZER_WORDSIZE == 64)  // On 32-bits this resides in user area.
+    CHECK_EQ(m->free_tid, kInvalidTid);
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  m->free_tid = t ? t->tid() : 0;
+  if (flags()->use_stack_depot) {
+    m->free_context_id = StackDepotPut(stack->trace, stack->size);
+  } else {
+    m->free_context_id = 0;
+    StackTrace::CompressStack(stack, m->FreeStackBeg(), m->FreeStackSize());
+  }
+  CHECK(m->chunk_state == CHUNK_QUARANTINE);
+  // Poison the region.
+  PoisonShadow(m->Beg(),
+               RoundUpTo(m->UsedSize(), SHADOW_GRANULARITY),
+               kAsanHeapFreeMagic);
+
+  AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+  thread_stats.frees++;
+  thread_stats.freed += m->UsedSize();
+
+  // Push into quarantine.
+  if (t) {
+    AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
+    AllocatorCache *ac = GetAllocatorCache(ms);
+    quarantine.Put(GetQuarantineCache(ms), QuarantineCallback(ac),
+                   m, m->UsedSize());
+  } else {
+    SpinMutexLock l(&fallback_mutex);
+    AllocatorCache *ac = &fallback_allocator_cache;
+    quarantine.Put(&fallback_quarantine_cache, QuarantineCallback(ac),
+                   m, m->UsedSize());
+  }
+}
+
+static void *Reallocate(void *old_ptr, uptr new_size, StackTrace *stack) {
+  CHECK(old_ptr && new_size);
+  uptr p = reinterpret_cast<uptr>(old_ptr);
+  uptr chunk_beg = p - kChunkHeaderSize;
+  AsanChunk *m = reinterpret_cast<AsanChunk *>(chunk_beg);
+
+  AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+  thread_stats.reallocs++;
+  thread_stats.realloced += new_size;
+
+  CHECK(m->chunk_state == CHUNK_ALLOCATED);
+  uptr old_size = m->UsedSize();
+  uptr memcpy_size = Min(new_size, old_size);
+  void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC);
+  if (new_ptr) {
+    CHECK_NE(REAL(memcpy), (void*)0);
+    REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
+    Deallocate(old_ptr, stack, FROM_MALLOC);
+  }
+  return new_ptr;
+}
+
+static AsanChunk *GetAsanChunkByAddr(uptr p) {
+  void *ptr = reinterpret_cast<void *>(p);
+  uptr alloc_beg = reinterpret_cast<uptr>(allocator.GetBlockBegin(ptr));
+  if (!alloc_beg) return 0;
+  uptr *memalign_magic = reinterpret_cast<uptr *>(alloc_beg);
+  if (memalign_magic[0] == kMemalignMagic) {
+    AsanChunk *m = reinterpret_cast<AsanChunk *>(memalign_magic[1]);
+    CHECK(m->from_memalign);
+    return m;
+  }
+  if (!allocator.FromPrimary(ptr)) {
+    uptr *meta = reinterpret_cast<uptr *>(
+        allocator.GetMetaData(reinterpret_cast<void *>(alloc_beg)));
+    AsanChunk *m = reinterpret_cast<AsanChunk *>(meta[1]);
+    return m;
+  }
+  uptr actual_size = allocator.GetActuallyAllocatedSize(ptr);
+  CHECK_LE(actual_size, SizeClassMap::kMaxSize);
+  // We know the actually allocted size, but we don't know the redzone size.
+  // Just try all possible redzone sizes.
+  for (u32 rz_log = 0; rz_log < 8; rz_log++) {
+    u32 rz_size = RZLog2Size(rz_log);
+    uptr max_possible_size = actual_size - rz_size;
+    if (ComputeRZLog(max_possible_size) != rz_log)
+      continue;
+    return reinterpret_cast<AsanChunk *>(
+        alloc_beg + rz_size - kChunkHeaderSize);
+  }
+  return 0;
+}
+
+static uptr AllocationSize(uptr p) {
+  AsanChunk *m = GetAsanChunkByAddr(p);
+  if (!m) return 0;
+  if (m->chunk_state != CHUNK_ALLOCATED) return 0;
+  if (m->Beg() != p) return 0;
+  return m->UsedSize();
+}
+
+// We have an address between two chunks, and we want to report just one.
+AsanChunk *ChooseChunk(uptr addr,
+                       AsanChunk *left_chunk, AsanChunk *right_chunk) {
+  // Prefer an allocated chunk over freed chunk and freed chunk
+  // over available chunk.
+  if (left_chunk->chunk_state != right_chunk->chunk_state) {
+    if (left_chunk->chunk_state == CHUNK_ALLOCATED)
+      return left_chunk;
+    if (right_chunk->chunk_state == CHUNK_ALLOCATED)
+      return right_chunk;
+    if (left_chunk->chunk_state == CHUNK_QUARANTINE)
+      return left_chunk;
+    if (right_chunk->chunk_state == CHUNK_QUARANTINE)
+      return right_chunk;
+  }
+  // Same chunk_state: choose based on offset.
+  sptr l_offset = 0, r_offset = 0;
+  CHECK(AsanChunkView(left_chunk).AddrIsAtRight(addr, 1, &l_offset));
+  CHECK(AsanChunkView(right_chunk).AddrIsAtLeft(addr, 1, &r_offset));
+  if (l_offset < r_offset)
+    return left_chunk;
+  return right_chunk;
+}
+
+AsanChunkView FindHeapChunkByAddress(uptr addr) {
+  AsanChunk *m1 = GetAsanChunkByAddr(addr);
+  if (!m1) return AsanChunkView(m1);
+  sptr offset = 0;
+  if (AsanChunkView(m1).AddrIsAtLeft(addr, 1, &offset)) {
+    // The address is in the chunk's left redzone, so maybe it is actually
+    // a right buffer overflow from the other chunk to the left.
+    // Search a bit to the left to see if there is another chunk.
+    AsanChunk *m2 = 0;
+    for (uptr l = 1; l < GetPageSizeCached(); l++) {
+      m2 = GetAsanChunkByAddr(addr - l);
+      if (m2 == m1) continue;  // Still the same chunk.
+      break;
+    }
+    if (m2 && AsanChunkView(m2).AddrIsAtRight(addr, 1, &offset))
+      m1 = ChooseChunk(addr, m2, m1);
+  }
+  return AsanChunkView(m1);
+}
+
+void AsanThreadLocalMallocStorage::CommitBack() {
+  AllocatorCache *ac = GetAllocatorCache(this);
+  quarantine.Drain(GetQuarantineCache(this), QuarantineCallback(ac));
+  allocator.SwallowCache(GetAllocatorCache(this));
+}
+
+void PrintInternalAllocatorStats() {
+  allocator.PrintStats();
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *asan_memalign(uptr alignment, uptr size, StackTrace *stack,
+                    AllocType alloc_type) {
+  return Allocate(size, alignment, stack, alloc_type);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void asan_free(void *ptr, StackTrace *stack, AllocType alloc_type) {
+  Deallocate(ptr, stack, alloc_type);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *asan_malloc(uptr size, StackTrace *stack) {
+  return Allocate(size, 8, stack, FROM_MALLOC);
+}
+
+void *asan_calloc(uptr nmemb, uptr size, StackTrace *stack) {
+  if (CallocShouldReturnNullDueToOverflow(size, nmemb)) return 0;
+  void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC);
+  // If the memory comes from the secondary allocator no need to clear it
+  // as it comes directly from mmap.
+  if (ptr && allocator.FromPrimary(ptr))
+    REAL(memset)(ptr, 0, nmemb * size);
+  return ptr;
+}
+
+void *asan_realloc(void *p, uptr size, StackTrace *stack) {
+  if (p == 0)
+    return Allocate(size, 8, stack, FROM_MALLOC);
+  if (size == 0) {
+    Deallocate(p, stack, FROM_MALLOC);
+    return 0;
+  }
+  return Reallocate(p, size, stack);
+}
+
+void *asan_valloc(uptr size, StackTrace *stack) {
+  return Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC);
+}
+
+void *asan_pvalloc(uptr size, StackTrace *stack) {
+  uptr PageSize = GetPageSizeCached();
+  size = RoundUpTo(size, PageSize);
+  if (size == 0) {
+    // pvalloc(0) should allocate one page.
+    size = PageSize;
+  }
+  return Allocate(size, PageSize, stack, FROM_MALLOC);
+}
+
+int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
+                        StackTrace *stack) {
+  void *ptr = Allocate(size, alignment, stack, FROM_MALLOC);
+  CHECK(IsAligned((uptr)ptr, alignment));
+  *memptr = ptr;
+  return 0;
+}
+
+uptr asan_malloc_usable_size(void *ptr, StackTrace *stack) {
+  CHECK(stack);
+  if (ptr == 0) return 0;
+  uptr usable_size = AllocationSize(reinterpret_cast<uptr>(ptr));
+  if (flags()->check_malloc_usable_size && (usable_size == 0))
+    ReportMallocUsableSizeNotOwned((uptr)ptr, stack);
+  return usable_size;
+}
+
+uptr asan_mz_size(const void *ptr) {
+  return AllocationSize(reinterpret_cast<uptr>(ptr));
+}
+
+void asan_mz_force_lock() {
+  allocator.ForceLock();
+  fallback_mutex.Lock();
+}
+
+void asan_mz_force_unlock() {
+  fallback_mutex.Unlock();
+  allocator.ForceUnlock();
+}
 
 }  // namespace __asan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+// ASan allocator doesn't reserve extra bytes, so normally we would
+// just return "size". We don't want to expose our redzone sizes, etc here.
+uptr __asan_get_estimated_allocated_size(uptr size) {
+  return size;
+}
+
+bool __asan_get_ownership(const void *p) {
+  uptr ptr = reinterpret_cast<uptr>(p);
+  return (AllocationSize(ptr) > 0);
+}
+
+uptr __asan_get_allocated_size(const void *p) {
+  if (p == 0) return 0;
+  uptr ptr = reinterpret_cast<uptr>(p);
+  uptr allocated_size = AllocationSize(ptr);
+  // Die if p is not malloced or if it is already freed.
+  if (allocated_size == 0) {
+    GET_STACK_TRACE_FATAL_HERE;
+    ReportAsanGetAllocatedSizeNotOwned(ptr, &stack);
+  }
+  return allocated_size;
+}
+
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+// Provide default (no-op) implementation of malloc hooks.
+extern "C" {
+SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
+void __asan_malloc_hook(void *ptr, uptr size) {
+  (void)ptr;
+  (void)size;
+}
+SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
+void __asan_free_hook(void *ptr) {
+  (void)ptr;
+}
+}  // extern "C"
+#endif
+
+
 #endif  // ASAN_ALLOCATOR_VERSION
diff --git a/lib/asan/asan_fake_stack.cc b/lib/asan/asan_fake_stack.cc
index 7c5a163..2cede36 100644
--- a/lib/asan/asan_fake_stack.cc
+++ b/lib/asan/asan_fake_stack.cc
@@ -14,7 +14,6 @@
 #include "asan_allocator.h"
 #include "asan_thread.h"
 #include "asan_thread_registry.h"
-#include "sanitizer/asan_interface.h"
 
 namespace __asan {
 
diff --git a/lib/asan/asan_flags.h b/lib/asan/asan_flags.h
index 989784d..377354a 100644
--- a/lib/asan/asan_flags.h
+++ b/lib/asan/asan_flags.h
@@ -15,13 +15,15 @@
 #ifndef ASAN_FLAGS_H
 #define ASAN_FLAGS_H
 
-#include "sanitizer/common_interface_defs.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
 
-// ASan flag values can be defined in three ways:
+// ASan flag values can be defined in four ways:
 // 1) initialized with default values at startup.
-// 2) overriden from string returned by user-specified function
+// 2) overriden during compilation of ASan runtime by providing
+//    compile definition ASAN_DEFAULT_OPTIONS.
+// 3) overriden from string returned by user-specified function
 //    __asan_default_options().
-// 3) overriden from env variable ASAN_OPTIONS.
+// 4) overriden from env variable ASAN_OPTIONS.
 
 namespace __asan {
 
@@ -45,15 +47,13 @@
   int  report_globals;
   // If set, attempts to catch initialization order issues.
   bool check_initialization_order;
-  // Max number of stack frames kept for each allocation.
+  // Max number of stack frames kept for each allocation/deallocation.
   int  malloc_context_size;
   // If set, uses custom wrappers and replacements for libc string functions
   // to find more errors.
   bool replace_str;
   // If set, uses custom wrappers for memset/memcpy/memmove intinsics.
   bool replace_intrin;
-  // Used on Mac only. See comments in asan_mac.cc and asan_malloc_mac.cc.
-  bool replace_cfallocator;
   // Used on Mac only.
   bool mac_ignore_invalid_free;
   // ASan allocator flag. See asan_allocator.cc.
@@ -79,6 +79,10 @@
   bool unmap_shadow_on_exit;
   // If set, calls abort() instead of _exit() after printing an error report.
   bool abort_on_error;
+  // Print various statistics after printing an error message or if atexit=1.
+  bool print_stats;
+  // Print the legend for the shadow bytes.
+  bool print_legend;
   // If set, prints ASan exit stats even after program terminates successfully.
   bool atexit;
   // By default, disable core dumper on 64-bit - it makes little sense
@@ -95,6 +99,20 @@
   bool print_full_thread_history;
   // ASan will write logs to "log_path.pid" instead of stderr.
   const char *log_path;
+  // Use fast (frame-pointer-based) unwinder on fatal errors (if available).
+  bool fast_unwind_on_fatal;
+  // Use fast (frame-pointer-based) unwinder on malloc/free (if available).
+  bool fast_unwind_on_malloc;
+  // Poison (or not) the heap memory on [de]allocation. Zero value is useful
+  // for benchmarking the allocator or instrumentator.
+  bool poison_heap;
+  // Report errors on malloc/delete, new/free, new/delete[], etc.
+  bool alloc_dealloc_mismatch;
+  // Use stack depot instead of storing stacks in the redzones.
+  bool use_stack_depot;
+  // If true, assume that memcmp(p1, p2, n) always reads n bytes before
+  // comparing p1 and p2.
+  bool strict_memcmp;
 };
 
 Flags *flags();
diff --git a/lib/asan/asan_globals.cc b/lib/asan/asan_globals.cc
index 6e90cb5..3101f16 100644
--- a/lib/asan/asan_globals.cc
+++ b/lib/asan/asan_globals.cc
@@ -13,13 +13,12 @@
 //===----------------------------------------------------------------------===//
 #include "asan_interceptors.h"
 #include "asan_internal.h"
-#include "asan_lock.h"
 #include "asan_mapping.h"
 #include "asan_report.h"
 #include "asan_stack.h"
 #include "asan_stats.h"
 #include "asan_thread.h"
-#include "sanitizer/asan_interface.h"
+#include "sanitizer_common/sanitizer_mutex.h"
 
 namespace __asan {
 
@@ -30,46 +29,35 @@
   ListOfGlobals *next;
 };
 
-static AsanLock mu_for_globals(LINKER_INITIALIZED);
+static BlockingMutex mu_for_globals(LINKER_INITIALIZED);
 static LowLevelAllocator allocator_for_globals;
 static ListOfGlobals *list_of_all_globals;
 static ListOfGlobals *list_of_dynamic_init_globals;
 
 void PoisonRedZones(const Global &g)  {
-  uptr shadow_rz_size = kGlobalAndStackRedzone >> SHADOW_SCALE;
-  CHECK(shadow_rz_size == 1 || shadow_rz_size == 2 || shadow_rz_size == 4);
-  // full right redzone
-  uptr g_aligned_size = kGlobalAndStackRedzone *
-      ((g.size + kGlobalAndStackRedzone - 1) / kGlobalAndStackRedzone);
-  PoisonShadow(g.beg + g_aligned_size,
-               kGlobalAndStackRedzone, kAsanGlobalRedzoneMagic);
-  if ((g.size % kGlobalAndStackRedzone) != 0) {
+  uptr aligned_size = RoundUpTo(g.size, SHADOW_GRANULARITY);
+  PoisonShadow(g.beg + aligned_size, g.size_with_redzone - aligned_size,
+               kAsanGlobalRedzoneMagic);
+  if (g.size != aligned_size) {
     // partial right redzone
-    u64 g_aligned_down_size = kGlobalAndStackRedzone *
-        (g.size / kGlobalAndStackRedzone);
-    CHECK(g_aligned_down_size == g_aligned_size - kGlobalAndStackRedzone);
-    PoisonShadowPartialRightRedzone(g.beg + g_aligned_down_size,
-                                    g.size % kGlobalAndStackRedzone,
-                                    kGlobalAndStackRedzone,
-                                    kAsanGlobalRedzoneMagic);
+    PoisonShadowPartialRightRedzone(
+        g.beg + RoundDownTo(g.size, SHADOW_GRANULARITY),
+        g.size % SHADOW_GRANULARITY,
+        SHADOW_GRANULARITY,
+        kAsanGlobalRedzoneMagic);
   }
 }
 
-static uptr GetAlignedSize(uptr size) {
-  return ((size + kGlobalAndStackRedzone - 1) / kGlobalAndStackRedzone)
-      * kGlobalAndStackRedzone;
-}
-
-bool DescribeAddressIfGlobal(uptr addr) {
+bool DescribeAddressIfGlobal(uptr addr, uptr size) {
   if (!flags()->report_globals) return false;
-  ScopedLock lock(&mu_for_globals);
+  BlockingMutexLock lock(&mu_for_globals);
   bool res = false;
   for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
     const Global &g = *l->g;
     if (flags()->report_globals >= 2)
       Report("Search Global: beg=%p size=%zu name=%s\n",
              (void*)g.beg, g.size, (char*)g.name);
-    res |= DescribeAddressRelativeToGlobal(addr, g);
+    res |= DescribeAddressRelativeToGlobal(addr, size, g);
   }
   return res;
 }
@@ -142,23 +130,10 @@
 // ---------------------- Interface ---------------- {{{1
 using namespace __asan;  // NOLINT
 
-// Register one global with a default redzone.
-void __asan_register_global(uptr addr, uptr size,
-                            const char *name) {
-  if (!flags()->report_globals) return;
-  ScopedLock lock(&mu_for_globals);
-  Global *g = (Global *)allocator_for_globals.Allocate(sizeof(Global));
-  g->beg = addr;
-  g->size = size;
-  g->size_with_redzone = GetAlignedSize(size) + kGlobalAndStackRedzone;
-  g->name = name;
-  RegisterGlobal(g);
-}
-
 // Register an array of globals.
 void __asan_register_globals(__asan_global *globals, uptr n) {
   if (!flags()->report_globals) return;
-  ScopedLock lock(&mu_for_globals);
+  BlockingMutexLock lock(&mu_for_globals);
   for (uptr i = 0; i < n; i++) {
     RegisterGlobal(&globals[i]);
   }
@@ -168,7 +143,7 @@
 // We must do this when a shared objects gets dlclosed.
 void __asan_unregister_globals(__asan_global *globals, uptr n) {
   if (!flags()->report_globals) return;
-  ScopedLock lock(&mu_for_globals);
+  BlockingMutexLock lock(&mu_for_globals);
   for (uptr i = 0; i < n; i++) {
     UnregisterGlobal(&globals[i]);
   }
@@ -181,7 +156,7 @@
 void __asan_before_dynamic_init(uptr first_addr, uptr last_addr) {
   if (!flags()->check_initialization_order) return;
   CHECK(list_of_dynamic_init_globals);
-  ScopedLock lock(&mu_for_globals);
+  BlockingMutexLock lock(&mu_for_globals);
   bool from_current_tu = false;
   // The list looks like:
   // a => ... => b => last_addr => ... => first_addr => c => ...
@@ -202,7 +177,7 @@
 // TU are poisoned.  It simply unpoisons all dynamically initialized globals.
 void __asan_after_dynamic_init() {
   if (!flags()->check_initialization_order) return;
-  ScopedLock lock(&mu_for_globals);
+  BlockingMutexLock lock(&mu_for_globals);
   for (ListOfGlobals *l = list_of_dynamic_init_globals; l; l = l->next)
     UnpoisonGlobal(l->g);
 }
diff --git a/lib/asan/asan_intercepted_functions.h b/lib/asan/asan_intercepted_functions.h
index 60b05e6..d529560 100644
--- a/lib/asan/asan_intercepted_functions.h
+++ b/lib/asan/asan_intercepted_functions.h
@@ -16,6 +16,10 @@
 
 #include "asan_internal.h"
 #include "interception/interception.h"
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
+
+#include <stdarg.h>
+#include <stddef.h>
 
 using __sanitizer::uptr;
 
@@ -41,10 +45,8 @@
 
 #if defined(__linux__)
 # define ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX 1
-# define ASAN_INTERCEPT_PRCTL 1
 #else
 # define ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX 0
-# define ASAN_INTERCEPT_PRCTL 0
 #endif
 
 #if !defined(__APPLE__)
@@ -65,9 +67,7 @@
 # define ASAN_INTERCEPT_SIGNAL_AND_SIGACTION 0
 #endif
 
-// On Darwin siglongjmp tailcalls longjmp, so we don't want to intercept it
-// there.
-#if !defined(_WIN32) && (!defined(__APPLE__) || MAC_INTERPOSE_FUNCTIONS)
+#if !defined(_WIN32)
 # define ASAN_INTERCEPT_SIGLONGJMP 1
 #else
 # define ASAN_INTERCEPT_SIGLONGJMP 0
@@ -79,149 +79,31 @@
 # define ASAN_INTERCEPT___CXA_THROW 0
 #endif
 
-#define DECLARE_FUNCTION_AND_WRAPPER(ret_type, func, ...) \
-  ret_type func(__VA_ARGS__); \
-  ret_type WRAP(func)(__VA_ARGS__)
-
-// Use extern declarations of intercepted functions on Mac and Windows
-// to avoid including system headers.
-#if defined(__APPLE__) || (defined(_WIN32) && !defined(_DLL))
-extern "C" {
-// signal.h
-# if ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
-struct sigaction;
-DECLARE_FUNCTION_AND_WRAPPER(int, sigaction, int sig,
-              const struct sigaction *act,
-              struct sigaction *oldact);
-DECLARE_FUNCTION_AND_WRAPPER(void*, signal, int signum, void *handler);
-# endif
-
-// setjmp.h
-DECLARE_FUNCTION_AND_WRAPPER(void, longjmp, void *env, int value);
-# if ASAN_INTERCEPT__LONGJMP
-DECLARE_FUNCTION_AND_WRAPPER(void, _longjmp, void *env, int value);
-# endif
-# if ASAN_INTERCEPT_SIGLONGJMP
-DECLARE_FUNCTION_AND_WRAPPER(void, siglongjmp, void *env, int value);
-# endif
-# if ASAN_INTERCEPT___CXA_THROW
-DECLARE_FUNCTION_AND_WRAPPER(void, __cxa_throw, void *a, void *b, void *c);
-#endif
-
-// string.h / strings.h
-DECLARE_FUNCTION_AND_WRAPPER(int, memcmp,
-                             const void *a1, const void *a2, uptr size);
-DECLARE_FUNCTION_AND_WRAPPER(void*, memmove,
-                             void *to, const void *from, uptr size);
-DECLARE_FUNCTION_AND_WRAPPER(void*, memcpy,
-                             void *to, const void *from, uptr size);
-DECLARE_FUNCTION_AND_WRAPPER(void*, memset, void *block, int c, uptr size);
-DECLARE_FUNCTION_AND_WRAPPER(char*, strchr, const char *str, int c);
-DECLARE_FUNCTION_AND_WRAPPER(char*, strcat,  /* NOLINT */
-                             char *to, const char* from);
-DECLARE_FUNCTION_AND_WRAPPER(char*, strncat,
-                             char *to, const char* from, uptr size);
-DECLARE_FUNCTION_AND_WRAPPER(char*, strcpy,  /* NOLINT */
-                             char *to, const char* from);
-DECLARE_FUNCTION_AND_WRAPPER(char*, strncpy,
-                             char *to, const char* from, uptr size);
-DECLARE_FUNCTION_AND_WRAPPER(int, strcmp, const char *s1, const char* s2);
-DECLARE_FUNCTION_AND_WRAPPER(int, strncmp,
-                             const char *s1, const char* s2, uptr size);
-DECLARE_FUNCTION_AND_WRAPPER(uptr, strlen, const char *s);
-# if ASAN_INTERCEPT_STRCASECMP_AND_STRNCASECMP
-DECLARE_FUNCTION_AND_WRAPPER(int, strcasecmp, const char *s1, const char *s2);
-DECLARE_FUNCTION_AND_WRAPPER(int, strncasecmp,
-                             const char *s1, const char *s2, uptr n);
-# endif
-# if ASAN_INTERCEPT_STRDUP
-DECLARE_FUNCTION_AND_WRAPPER(char*, strdup, const char *s);
-# endif
-# if ASAN_INTERCEPT_STRNLEN
-DECLARE_FUNCTION_AND_WRAPPER(uptr, strnlen, const char *s, uptr maxlen);
-# endif
-#if ASAN_INTERCEPT_INDEX
-DECLARE_FUNCTION_AND_WRAPPER(char*, index, const char *string, int c);
-#endif
-
-// stdlib.h
-DECLARE_FUNCTION_AND_WRAPPER(int, atoi, const char *nptr);
-DECLARE_FUNCTION_AND_WRAPPER(long, atol, const char *nptr);  // NOLINT
-DECLARE_FUNCTION_AND_WRAPPER(long, strtol, const char *nptr, char **endptr, int base);  // NOLINT
-# if ASAN_INTERCEPT_ATOLL_AND_STRTOLL
-DECLARE_FUNCTION_AND_WRAPPER(long long, atoll, const char *nptr);  // NOLINT
-DECLARE_FUNCTION_AND_WRAPPER(long long, strtoll, const char *nptr, char **endptr, int base);  // NOLINT
-# endif
-
-# if ASAN_INTERCEPT_MLOCKX
-// mlock/munlock
-DECLARE_FUNCTION_AND_WRAPPER(int, mlock, const void *addr, size_t len);
-DECLARE_FUNCTION_AND_WRAPPER(int, munlock, const void *addr, size_t len);
-DECLARE_FUNCTION_AND_WRAPPER(int, mlockall, int flags);
-DECLARE_FUNCTION_AND_WRAPPER(int, munlockall, void);
-# endif
-
 // Windows threads.
 # if defined(_WIN32)
+extern "C" {
 __declspec(dllimport)
 void* __stdcall CreateThread(void *sec, uptr st, void* start,
                              void *arg, DWORD fl, DWORD *id);
+
+int memcmp(const void *a1, const void *a2, uptr size);
+void memmove(void *to, const void *from, uptr size);
+void* memset(void *block, int c, uptr size);
+void* memcpy(void *to, const void *from, uptr size);
+char* strcat(char *to, const char* from);  // NOLINT
+char* strchr(const char *str, int c);
+int strcmp(const char *s1, const char* s2);
+char* strcpy(char *to, const char* from);  // NOLINT
+uptr strlen(const char *s);
+char* strncat(char *to, const char* from, uptr size);
+int strncmp(const char *s1, const char* s2, uptr size);
+char* strncpy(char *to, const char* from, uptr size);
+uptr strnlen(const char *s, uptr maxlen);
+int atoi(const char *nptr);
+long atol(const char *nptr);  // NOLINT
+long strtol(const char *nptr, char **endptr, int base);  // NOLINT
+void longjmp(void *env, int value);
+}
 # endif
-// Posix threads.
-# if ASAN_INTERCEPT_PTHREAD_CREATE
-DECLARE_FUNCTION_AND_WRAPPER(int, pthread_create,
-                             void *thread, void *attr,
-                             void *(*start_routine)(void*), void *arg);
-# endif
-
-#if defined(__APPLE__)
-typedef void* pthread_workqueue_t;
-typedef void* pthread_workitem_handle_t;
-
-typedef void* dispatch_group_t;
-typedef void* dispatch_queue_t;
-typedef void* dispatch_source_t;
-typedef u64 dispatch_time_t;
-typedef void (*dispatch_function_t)(void *block);
-typedef void* (*worker_t)(void *block);
-typedef void* CFStringRef;
-typedef void* CFAllocatorRef;
-
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_async_f,
-                             dispatch_queue_t dq,
-                             void *ctxt, dispatch_function_t func);
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_sync_f,
-                             dispatch_queue_t dq,
-                             void *ctxt, dispatch_function_t func);
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_after_f,
-                             dispatch_time_t when, dispatch_queue_t dq,
-                             void *ctxt, dispatch_function_t func);
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_barrier_async_f,
-                             dispatch_queue_t dq,
-                             void *ctxt, dispatch_function_t func);
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_group_async_f,
-                             dispatch_group_t group, dispatch_queue_t dq,
-                             void *ctxt, dispatch_function_t func);
-
-DECLARE_FUNCTION_AND_WRAPPER(void, __CFInitialize, void);
-DECLARE_FUNCTION_AND_WRAPPER(CFStringRef, CFStringCreateCopy,
-                             CFAllocatorRef alloc, CFStringRef str);
-DECLARE_FUNCTION_AND_WRAPPER(void, free, void* ptr);
-#if MAC_INTERPOSE_FUNCTIONS && !defined(MISSING_BLOCKS_SUPPORT)
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_group_async,
-                             dispatch_group_t dg,
-                             dispatch_queue_t dq, void (^work)(void));
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_async,
-                             dispatch_queue_t dq, void (^work)(void));
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_after,
-                             dispatch_queue_t dq, void (^work)(void));
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_source_set_event_handler,
-                             dispatch_source_t ds, void (^work)(void));
-DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_source_set_cancel_handler,
-                             dispatch_source_t ds, void (^work)(void));
-#endif  // MAC_INTERPOSE_FUNCTIONS
-#endif  // __APPLE__
-}  // extern "C"
-#endif
 
 #endif  // ASAN_INTERCEPTED_FUNCTIONS_H
diff --git a/lib/asan/asan_interceptors.cc b/lib/asan/asan_interceptors.cc
index 2eecede..b1efe74 100644
--- a/lib/asan/asan_interceptors.cc
+++ b/lib/asan/asan_interceptors.cc
@@ -22,43 +22,38 @@
 #include "asan_stats.h"
 #include "asan_thread_registry.h"
 #include "interception/interception.h"
-#include "sanitizer/asan_interface.h"
 #include "sanitizer_common/sanitizer_libc.h"
 
 namespace __asan {
 
-// Instruments read/write access to a single byte in memory.
-// On error calls __asan_report_error, which aborts the program.
-#define ACCESS_ADDRESS(address, isWrite)   do {         \
-  if (!AddrIsInMem(address) || AddressIsPoisoned(address)) {                \
-    GET_CURRENT_PC_BP_SP;                               \
-    __asan_report_error(pc, bp, sp, address, isWrite, /* access_size */ 1); \
-  } \
-} while (0)
+// Return true if we can quickly decide that the region is unpoisoned.
+static inline bool QuickCheckForUnpoisonedRegion(uptr beg, uptr size) {
+  if (size == 0) return true;
+  if (size <= 32)
+    return !AddressIsPoisoned(beg) &&
+           !AddressIsPoisoned(beg + size - 1) &&
+           !AddressIsPoisoned(beg + size / 2);
+  return false;
+}
 
 // We implement ACCESS_MEMORY_RANGE, ASAN_READ_RANGE,
 // and ASAN_WRITE_RANGE as macro instead of function so
 // that no extra frames are created, and stack trace contains
 // relevant information only.
+// We check all shadow bytes.
+#define ACCESS_MEMORY_RANGE(offset, size, isWrite) do {                 \
+    uptr __offset = (uptr)(offset);                                     \
+    uptr __size = (uptr)(size);                                         \
+    uptr __bad = 0;                                                     \
+    if (!QuickCheckForUnpoisonedRegion(__offset, __size) &&             \
+        (__bad = __asan_region_is_poisoned(__offset, __size))) {        \
+      GET_CURRENT_PC_BP_SP;                                             \
+      __asan_report_error(pc, bp, sp, __bad, isWrite, __size);          \
+    }                                                                   \
+  } while (0)
 
-// Instruments read/write access to a memory range.
-// More complex implementation is possible, for now just
-// checking the first and the last byte of a range.
-#define ACCESS_MEMORY_RANGE(offset, size, isWrite) do { \
-  if (size > 0) { \
-    uptr ptr = (uptr)(offset); \
-    ACCESS_ADDRESS(ptr, isWrite); \
-    ACCESS_ADDRESS(ptr + (size) - 1, isWrite); \
-  } \
-} while (0)
-
-#define ASAN_READ_RANGE(offset, size) do { \
-  ACCESS_MEMORY_RANGE(offset, size, false); \
-} while (0)
-
-#define ASAN_WRITE_RANGE(offset, size) do { \
-  ACCESS_MEMORY_RANGE(offset, size, true); \
-} while (0)
+#define ASAN_READ_RANGE(offset, size) ACCESS_MEMORY_RANGE(offset, size, false)
+#define ASAN_WRITE_RANGE(offset, size) ACCESS_MEMORY_RANGE(offset, size, true);
 
 // Behavior of functions like "memcpy" or "strcpy" is undefined
 // if memory intervals overlap. We report error in this case.
@@ -71,7 +66,7 @@
   const char *offset1 = (const char*)_offset1; \
   const char *offset2 = (const char*)_offset2; \
   if (RangesOverlap(offset1, length1, offset2, length2)) { \
-    GET_STACK_TRACE_HERE(kStackTraceMax); \
+    GET_STACK_TRACE_FATAL_HERE; \
     ReportStringFunctionMemoryRangesOverlap(name, offset1, length1, \
                                             offset2, length2, &stack); \
   } \
@@ -93,11 +88,31 @@
   return internal_strnlen(s, maxlen);
 }
 
+void SetThreadName(const char *name) {
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  if (t)
+    t->summary()->set_name(name);
+}
+
 }  // namespace __asan
 
 // ---------------------- Wrappers ---------------- {{{1
 using namespace __asan;  // NOLINT
 
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
+  ASAN_WRITE_RANGE(ptr, size)
+#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) ASAN_READ_RANGE(ptr, size)
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \
+  do {                                           \
+    ctx = 0;                                     \
+    (void)ctx;                                   \
+    ENSURE_ASAN_INITED();                        \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd) do { } while (false)
+#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd) do { } while (false)
+#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) SetThreadName(name)
+#include "sanitizer_common/sanitizer_common_interceptors.inc"
+
 static thread_return_t THREAD_CALLING_CONV asan_thread_start(void *arg) {
   AsanThread *t = (AsanThread*)arg;
   asanThreadRegistry().SetCurrent(t);
@@ -107,7 +122,7 @@
 #if ASAN_INTERCEPT_PTHREAD_CREATE
 INTERCEPTOR(int, pthread_create, void *thread,
     void *attr, void *(*start_routine)(void*), void *arg) {
-  GET_STACK_TRACE_HERE(kStackTraceMax);
+  GET_STACK_TRACE_THREAD;
   u32 current_tid = asanThreadRegistry().GetCurrentTidOrInvalid();
   AsanThread *t = AsanThread::Create(current_tid, start_routine, arg, &stack);
   asanThreadRegistry().RegisterThread(t);
@@ -137,6 +152,18 @@
 #endif  // ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
 
 #if ASAN_INTERCEPT_SWAPCONTEXT
+static void ClearShadowMemoryForContextStack(uptr stack, uptr ssize) {
+  // Align to page size.
+  uptr PageSize = GetPageSizeCached();
+  uptr bottom = stack & ~(PageSize - 1);
+  ssize += stack - bottom;
+  ssize = RoundUpTo(ssize, PageSize);
+  static const uptr kMaxSaneContextStackSize = 1 << 22;  // 4 Mb
+  if (ssize && ssize <= kMaxSaneContextStackSize) {
+    PoisonShadow(bottom, ssize, 0);
+  }
+}
+
 INTERCEPTOR(int, swapcontext, struct ucontext_t *oucp,
             struct ucontext_t *ucp) {
   static bool reported_warning = false;
@@ -147,16 +174,18 @@
   }
   // Clear shadow memory for new context (it may share stack
   // with current context).
-  ClearShadowMemoryForContext(ucp);
+  uptr stack, ssize;
+  ReadContextStack(ucp, &stack, &ssize);
+  ClearShadowMemoryForContextStack(stack, ssize);
   int res = REAL(swapcontext)(oucp, ucp);
   // swapcontext technically does not return, but program may swap context to
   // "oucp" later, that would look as if swapcontext() returned 0.
   // We need to clear shadow for ucp once again, as it may be in arbitrary
   // state.
-  ClearShadowMemoryForContext(ucp);
+  ClearShadowMemoryForContextStack(stack, ssize);
   return res;
 }
-#endif
+#endif  // ASAN_INTERCEPT_SWAPCONTEXT
 
 INTERCEPTOR(void, longjmp, void *env, int val) {
   __asan_handle_no_return();
@@ -177,25 +206,6 @@
 }
 #endif
 
-#if ASAN_INTERCEPT_PRCTL
-#define PR_SET_NAME 15
-INTERCEPTOR(int, prctl, int option,
-            unsigned long arg2, unsigned long arg3,  // NOLINT
-            unsigned long arg4, unsigned long arg5) {  // NOLINT
-  int res = REAL(prctl(option, arg2, arg3, arg4, arg5));
-  if (option == PR_SET_NAME) {
-    AsanThread *t = asanThreadRegistry().GetCurrent();
-    if (t) {
-      char buff[17];
-      internal_strncpy(buff, (char*)arg2, 16);
-      buff[16] = 0;
-      t->summary()->set_name(buff);
-    }
-  }
-  return res;
-}
-#endif
-
 #if ASAN_INTERCEPT___CXA_THROW
 INTERCEPTOR(void, __cxa_throw, void *a, void *b, void *c) {
   CHECK(REAL(__cxa_throw));
@@ -249,18 +259,29 @@
 INTERCEPTOR(int, memcmp, const void *a1, const void *a2, uptr size) {
   if (!asan_inited) return internal_memcmp(a1, a2, size);
   ENSURE_ASAN_INITED();
-  unsigned char c1 = 0, c2 = 0;
-  const unsigned char *s1 = (const unsigned char*)a1;
-  const unsigned char *s2 = (const unsigned char*)a2;
-  uptr i;
-  for (i = 0; i < size; i++) {
-    c1 = s1[i];
-    c2 = s2[i];
-    if (c1 != c2) break;
+  if (flags()->replace_intrin) {
+    if (flags()->strict_memcmp) {
+      // Check the entire regions even if the first bytes of the buffers are
+      // different.
+      ASAN_READ_RANGE(a1, size);
+      ASAN_READ_RANGE(a2, size);
+      // Fallthrough to REAL(memcmp) below.
+    } else {
+      unsigned char c1 = 0, c2 = 0;
+      const unsigned char *s1 = (const unsigned char*)a1;
+      const unsigned char *s2 = (const unsigned char*)a2;
+      uptr i;
+      for (i = 0; i < size; i++) {
+        c1 = s1[i];
+        c2 = s2[i];
+        if (c1 != c2) break;
+      }
+      ASAN_READ_RANGE(s1, Min(i + 1, size));
+      ASAN_READ_RANGE(s2, Min(i + 1, size));
+      return CharCmp(c1, c2);
+    }
   }
-  ASAN_READ_RANGE(s1, Min(i + 1, size));
-  ASAN_READ_RANGE(s2, Min(i + 1, size));
-  return CharCmp(c1, c2);
+  return REAL(memcmp(a1, a2, size));
 }
 
 INTERCEPTOR(void*, memcpy, void *to, const void *from, uptr size) {
@@ -280,13 +301,9 @@
     ASAN_READ_RANGE(from, size);
     ASAN_WRITE_RANGE(to, size);
   }
-#if MAC_INTERPOSE_FUNCTIONS
   // Interposing of resolver functions is broken on Mac OS 10.7 and 10.8.
   // See also http://code.google.com/p/address-sanitizer/issues/detail?id=116.
   return internal_memcpy(to, from, size);
-#else
-  return REAL(memcpy)(to, from, size);
-#endif
 }
 
 INTERCEPTOR(void*, memmove, void *to, const void *from, uptr size) {
@@ -299,13 +316,9 @@
     ASAN_READ_RANGE(from, size);
     ASAN_WRITE_RANGE(to, size);
   }
-#if MAC_INTERPOSE_FUNCTIONS
   // Interposing of resolver functions is broken on Mac OS 10.7 and 10.8.
   // See also http://code.google.com/p/address-sanitizer/issues/detail?id=116.
   return internal_memmove(to, from, size);
-#else
-  return REAL(memmove)(to, from, size);
-#endif
 }
 
 INTERCEPTOR(void*, memset, void *block, int c, uptr size) {
@@ -342,7 +355,12 @@
 INTERCEPTOR(char*, index, const char *string, int c)
   ALIAS(WRAPPER_NAME(strchr));
 # else
-DEFINE_REAL(char*, index, const char *string, int c)
+#  if defined(__APPLE__)
+DECLARE_REAL(char*, index, const char *string, int c)
+OVERRIDE_FUNCTION(index, strchr);
+#  else
+DEFINE_REAL(char*, index, const char *string, int c);
+#  endif
 # endif
 #endif  // ASAN_INTERCEPT_INDEX
 
@@ -403,7 +421,7 @@
 }
 
 INTERCEPTOR(char*, strcpy, char *to, const char *from) {  // NOLINT
-#if MAC_INTERPOSE_FUNCTIONS
+#if defined(__APPLE__)
   if (!asan_inited) return REAL(strcpy)(to, from);  // NOLINT
 #endif
   // strcpy is called from malloc_default_purgeable_zone()
@@ -423,7 +441,7 @@
 
 #if ASAN_INTERCEPT_STRDUP
 INTERCEPTOR(char*, strdup, const char *s) {
-#if MAC_INTERPOSE_FUNCTIONS
+#if defined(__APPLE__)
   // FIXME: because internal_strdup() uses InternalAlloc(), which currently
   // just calls malloc() on Mac, we can't use internal_strdup() with the
   // dynamic runtime. We can remove the call to REAL(strdup) once InternalAlloc
@@ -564,7 +582,7 @@
 }
 
 INTERCEPTOR(int, atoi, const char *nptr) {
-#if MAC_INTERPOSE_FUNCTIONS
+#if defined(__APPLE__)
   if (!asan_inited) return REAL(atoi)(nptr);
 #endif
   ENSURE_ASAN_INITED();
@@ -583,7 +601,7 @@
 }
 
 INTERCEPTOR(long, atol, const char *nptr) {  // NOLINT
-#if MAC_INTERPOSE_FUNCTIONS
+#if defined(__APPLE__)
   if (!asan_inited) return REAL(atol)(nptr);
 #endif
   ENSURE_ASAN_INITED();
@@ -642,7 +660,7 @@
                    void* security, uptr stack_size,
                    DWORD (__stdcall *start_routine)(void*), void* arg,
                    DWORD flags, void* tid) {
-  GET_STACK_TRACE_HERE(kStackTraceMax);
+  GET_STACK_TRACE_THREAD;
   u32 current_tid = asanThreadRegistry().GetCurrentTidOrInvalid();
   AsanThread *t = AsanThread::Create(current_tid, start_routine, arg, &stack);
   asanThreadRegistry().RegisterThread(t);
@@ -664,21 +682,17 @@
   static bool was_called_once;
   CHECK(was_called_once == false);
   was_called_once = true;
-#if MAC_INTERPOSE_FUNCTIONS
+#if defined(__APPLE__)
   return;
-#endif
+#else
+  SANITIZER_COMMON_INTERCEPTORS_INIT;
+
   // Intercept mem* functions.
   ASAN_INTERCEPT_FUNC(memcmp);
   ASAN_INTERCEPT_FUNC(memmove);
   ASAN_INTERCEPT_FUNC(memset);
   if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) {
     ASAN_INTERCEPT_FUNC(memcpy);
-  } else {
-#if !MAC_INTERPOSE_FUNCTIONS
-    // If we're using dynamic interceptors on Mac, these two are just plain
-    // functions.
-    internal_memcpy(&REAL(memcpy), &REAL(memmove), sizeof(REAL(memmove)));
-#endif
   }
 
   // Intercept str* functions.
@@ -700,12 +714,8 @@
 #if ASAN_INTERCEPT_STRNLEN
   ASAN_INTERCEPT_FUNC(strnlen);
 #endif
-#if ASAN_INTERCEPT_INDEX
-# if ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
+#if ASAN_INTERCEPT_INDEX && ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
   ASAN_INTERCEPT_FUNC(index);
-# else
-  CHECK(OVERRIDE_FUNCTION(index, WRAP(strchr)));
-# endif
 #endif
 
   ASAN_INTERCEPT_FUNC(atoi);
@@ -739,9 +749,6 @@
 #if ASAN_INTERCEPT_SIGLONGJMP
   ASAN_INTERCEPT_FUNC(siglongjmp);
 #endif
-#if ASAN_INTERCEPT_PRCTL
-  ASAN_INTERCEPT_FUNC(prctl);
-#endif
 
   // Intercept exception handling functions.
 #if ASAN_INTERCEPT___CXA_THROW
@@ -758,14 +765,10 @@
   InitializeWindowsInterceptors();
 #endif
 
-  // Some Mac-specific interceptors.
-#if defined(__APPLE__)
-  InitializeMacInterceptors();
-#endif
-
   if (flags()->verbosity > 0) {
     Report("AddressSanitizer: libc interceptors initialized\n");
   }
+#endif  // __APPLE__
 }
 
 }  // namespace __asan
diff --git a/lib/asan/asan_interceptors.h b/lib/asan/asan_interceptors.h
index 3b3e90e..91830aa 100644
--- a/lib/asan/asan_interceptors.h
+++ b/lib/asan/asan_interceptors.h
@@ -32,9 +32,6 @@
 namespace __asan {
 
 void InitializeAsanInterceptors();
-#if defined(__APPLE__)
-void InitializeMacInterceptors();
-#endif  // __APPLE__
 
 }  // namespace __asan
 
diff --git a/lib/asan/asan_interface_internal.h b/lib/asan/asan_interface_internal.h
new file mode 100644
index 0000000..48220e7
--- /dev/null
+++ b/lib/asan/asan_interface_internal.h
@@ -0,0 +1,135 @@
+//===-- asan_interface_internal.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// This header can be included by the instrumented program to fetch
+// data (mostly allocator statistics) from ASan runtime library.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_INTERFACE_INTERNAL_H
+#define ASAN_INTERFACE_INTERNAL_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+using __sanitizer::uptr;
+
+extern "C" {
+  // This function should be called at the very beginning of the process,
+  // before any instrumented code is executed and before any call to malloc.
+  // Everytime the asan ABI changes we also change the version number in this
+  // name. Objects build with incompatible asan ABI version
+  // will not link with run-time.
+  void __asan_init_v1() SANITIZER_INTERFACE_ATTRIBUTE;
+  #define __asan_init __asan_init_v1
+
+  // This structure describes an instrumented global variable.
+  struct __asan_global {
+    uptr beg;                // The address of the global.
+    uptr size;               // The original size of the global.
+    uptr size_with_redzone;  // The size with the redzone.
+    const char *name;        // Name as a C string.
+    uptr has_dynamic_init;   // Non-zero if the global has dynamic initializer.
+  };
+
+  // These two functions should be called by the instrumented code.
+  // 'globals' is an array of structures describing 'n' globals.
+  void __asan_register_globals(__asan_global *globals, uptr n)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_unregister_globals(__asan_global *globals, uptr n)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // These two functions should be called before and after dynamic initializers
+  // run, respectively.  They should be called with parameters describing all
+  // dynamically initialized globals defined in the calling TU.
+  void __asan_before_dynamic_init(uptr first_addr, uptr last_addr)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_after_dynamic_init()
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // These two functions are used by the instrumented code in the
+  // use-after-return mode. __asan_stack_malloc allocates size bytes of
+  // fake stack and __asan_stack_free poisons it. real_stack is a pointer to
+  // the real stack region.
+  uptr __asan_stack_malloc(uptr size, uptr real_stack)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_stack_free(uptr ptr, uptr size, uptr real_stack)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // These two functions are used by instrumented code in the
+  // use-after-scope mode. They mark memory for local variables as
+  // unaddressable when they leave scope and addressable before the
+  // function exits.
+  void __asan_poison_stack_memory(uptr addr, uptr size)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_unpoison_stack_memory(uptr addr, uptr size)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // Performs cleanup before a NoReturn function. Must be called before things
+  // like _exit and execl to avoid false positives on stack.
+  void __asan_handle_no_return() SANITIZER_INTERFACE_ATTRIBUTE;
+
+  void __asan_poison_memory_region(void const volatile *addr, uptr size)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_unpoison_memory_region(void const volatile *addr, uptr size)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  bool __asan_address_is_poisoned(void const volatile *addr)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  uptr __asan_region_is_poisoned(uptr beg, uptr size)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  void __asan_describe_address(uptr addr)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  void __asan_report_error(uptr pc, uptr bp, uptr sp,
+                           uptr addr, bool is_write, uptr access_size)
+    SANITIZER_INTERFACE_ATTRIBUTE;
+
+  int __asan_set_error_exit_code(int exit_code)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_set_death_callback(void (*callback)(void))
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_set_error_report_callback(void (*callback)(const char*))
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  /* OPTIONAL */ void __asan_on_error()
+      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+
+  /* OPTIONAL */ bool __asan_symbolize(const void *pc, char *out_buffer,
+                                       int out_size)
+      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+
+  uptr __asan_get_estimated_allocated_size(uptr size)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  bool __asan_get_ownership(const void *p)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  uptr __asan_get_allocated_size(const void *p)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  uptr __asan_get_current_allocated_bytes()
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  uptr __asan_get_heap_size()
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  uptr __asan_get_free_bytes()
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  uptr __asan_get_unmapped_bytes()
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_print_accumulated_stats()
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  /* OPTIONAL */ const char* __asan_default_options()
+      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+
+  /* OPTIONAL */ void __asan_malloc_hook(void *ptr, uptr size)
+      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+  /* OPTIONAL */ void __asan_free_hook(void *ptr)
+      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+}  // extern "C"
+
+#endif  // ASAN_INTERFACE_INTERNAL_H
diff --git a/lib/asan/asan_internal.h b/lib/asan/asan_internal.h
index 468d997..1edd8a7 100644
--- a/lib/asan/asan_internal.h
+++ b/lib/asan/asan_internal.h
@@ -15,6 +15,7 @@
 #define ASAN_INTERNAL_H
 
 #include "asan_flags.h"
+#include "asan_interface_internal.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
@@ -53,7 +54,7 @@
 
 #define ASAN_POSIX (ASAN_LINUX || ASAN_MAC)
 
-#if __has_feature(address_sanitizer)
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
 # error "The AddressSanitizer run-time should not be"
         " instrumented by AddressSanitizer"
 #endif
@@ -83,13 +84,17 @@
 // If set, values like allocator chunk size, as well as defaults for some flags
 // will be changed towards less memory overhead.
 #ifndef ASAN_LOW_MEMORY
-# ifdef ASAN_ANDROID
+#if SANITIZER_WORDSIZE == 32
 #  define ASAN_LOW_MEMORY 1
-# else
+#else
 #  define ASAN_LOW_MEMORY 0
 # endif
 #endif
 
+#ifndef ASAN_USE_PREINIT_ARRAY
+# define ASAN_USE_PREINIT_ARRAY (ASAN_LINUX && !ASAN_ANDROID)
+#endif
+
 // All internal functions in asan reside inside the __asan namespace
 // to avoid namespace collisions with the user programs.
 // Seperate namespace also makes it simpler to distinguish the asan run-time
@@ -116,7 +121,7 @@
 void SetAlternateSignalStack();
 void UnsetAlternateSignalStack();
 void InstallSignalHandlers();
-void ClearShadowMemoryForContext(void *context);
+void ReadContextStack(void *context, uptr *stack, uptr *ssize);
 void AsanPlatformThreadInit();
 
 // Wrapper for TLS/TSD.
diff --git a/lib/asan/asan_linux.cc b/lib/asan/asan_linux.cc
index 068f7b1..845493d 100644
--- a/lib/asan/asan_linux.cc
+++ b/lib/asan/asan_linux.cc
@@ -15,7 +15,6 @@
 
 #include "asan_interceptors.h"
 #include "asan_internal.h"
-#include "asan_lock.h"
 #include "asan_thread.h"
 #include "asan_thread_registry.h"
 #include "sanitizer_common/sanitizer_libc.h"
@@ -102,90 +101,32 @@
   // Nothing here for now.
 }
 
-AsanLock::AsanLock(LinkerInitialized) {
-  // We assume that pthread_mutex_t initialized to all zeroes is a valid
-  // unlocked mutex. We can not use PTHREAD_MUTEX_INITIALIZER as it triggers
-  // a gcc warning:
-  // extended initializer lists only available with -std=c++0x or -std=gnu++0x
-}
-
-void AsanLock::Lock() {
-  CHECK(sizeof(pthread_mutex_t) <= sizeof(opaque_storage_));
-  pthread_mutex_lock((pthread_mutex_t*)&opaque_storage_);
-  CHECK(!owner_);
-  owner_ = (uptr)pthread_self();
-}
-
-void AsanLock::Unlock() {
-  CHECK(owner_ == (uptr)pthread_self());
-  owner_ = 0;
-  pthread_mutex_unlock((pthread_mutex_t*)&opaque_storage_);
-}
-
-#ifdef __arm__
-#define UNWIND_STOP _URC_END_OF_STACK
-#define UNWIND_CONTINUE _URC_NO_REASON
-#else
-#define UNWIND_STOP _URC_NORMAL_STOP
-#define UNWIND_CONTINUE _URC_NO_REASON
-#endif
-
-uptr Unwind_GetIP(struct _Unwind_Context *ctx) {
-#ifdef __arm__
-  uptr val;
-  _Unwind_VRS_Result res = _Unwind_VRS_Get(ctx, _UVRSC_CORE,
-      15 /* r15 = PC */, _UVRSD_UINT32, &val);
-  CHECK(res == _UVRSR_OK && "_Unwind_VRS_Get failed");
-  // Clear the Thumb bit.
-  return val & ~(uptr)1;
-#else
-  return _Unwind_GetIP(ctx);
-#endif
-}
-
-_Unwind_Reason_Code Unwind_Trace(struct _Unwind_Context *ctx,
-    void *param) {
-  StackTrace *b = (StackTrace*)param;
-  CHECK(b->size < b->max_size);
-  uptr pc = Unwind_GetIP(ctx);
-  b->trace[b->size++] = pc;
-  if (b->size == b->max_size) return UNWIND_STOP;
-  return UNWIND_CONTINUE;
-}
-
-void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp) {
-  stack->size = 0;
-  stack->trace[0] = pc;
-  if ((max_s) > 1) {
-    stack->max_size = max_s;
+void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp, bool fast) {
 #if defined(__arm__) || \
     defined(__powerpc__) || defined(__powerpc64__) || \
     defined(__sparc__)
-    _Unwind_Backtrace(Unwind_Trace, stack);
-    // Pop off the two ASAN functions from the backtrace.
-    stack->PopStackFrames(2);
-#else
+  fast = false;
+#endif
+  if (!fast)
+    return stack->SlowUnwindStack(pc, max_s);
+  stack->size = 0;
+  stack->trace[0] = pc;
+  if (max_s > 1) {
+    stack->max_size = max_s;
     if (!asan_inited) return;
     if (AsanThread *t = asanThreadRegistry().GetCurrent())
       stack->FastUnwindStack(pc, bp, t->stack_top(), t->stack_bottom());
-#endif
   }
 }
 
 #if !ASAN_ANDROID
-void ClearShadowMemoryForContext(void *context) {
+void ReadContextStack(void *context, uptr *stack, uptr *ssize) {
   ucontext_t *ucp = (ucontext_t*)context;
-  uptr sp = (uptr)ucp->uc_stack.ss_sp;
-  uptr size = ucp->uc_stack.ss_size;
-  // Align to page size.
-  uptr PageSize = GetPageSizeCached();
-  uptr bottom = sp & ~(PageSize - 1);
-  size += sp - bottom;
-  size = RoundUpTo(size, PageSize);
-  PoisonShadow(bottom, size, 0);
+  *stack = (uptr)ucp->uc_stack.ss_sp;
+  *ssize = ucp->uc_stack.ss_size;
 }
 #else
-void ClearShadowMemoryForContext(void *context) {
+void ReadContextStack(void *context, uptr *stack, uptr *ssize) {
   UNIMPLEMENTED();
 }
 #endif
diff --git a/lib/asan/asan_lock.h b/lib/asan/asan_lock.h
index edee49a..e69de29 100644
--- a/lib/asan/asan_lock.h
+++ b/lib/asan/asan_lock.h
@@ -1,42 +0,0 @@
-//===-- asan_lock.h ---------------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of AddressSanitizer, an address sanity checker.
-//
-// A wrapper for a simple lock.
-//===----------------------------------------------------------------------===//
-#ifndef ASAN_LOCK_H
-#define ASAN_LOCK_H
-
-#include "sanitizer_common/sanitizer_mutex.h"
-#include "asan_internal.h"
-
-// The locks in ASan are global objects and they are never destroyed to avoid
-// at-exit races (that is, a lock is being used by other threads while the main
-// thread is doing atexit destructors).
-// We define the class using opaque storage to avoid including system headers.
-
-namespace __asan {
-
-class AsanLock {
- public:
-  explicit AsanLock(LinkerInitialized);
-  void Lock();
-  void Unlock();
-  bool IsLocked() { return owner_ != 0; }
- private:
-  uptr opaque_storage_[10];
-  uptr owner_;  // for debugging and for malloc_introspection_t interface
-};
-
-typedef GenericScopedLock<AsanLock> ScopedLock;
-
-}  // namespace __asan
-
-#endif  // ASAN_LOCK_H
diff --git a/lib/asan/asan_mac.cc b/lib/asan/asan_mac.cc
index 7abe9a4..0f51a06 100644
--- a/lib/asan/asan_mac.cc
+++ b/lib/asan/asan_mac.cc
@@ -36,7 +36,6 @@
 #include <stdlib.h>  // for free()
 #include <unistd.h>
 #include <libkern/OSAtomic.h>
-#include <CoreFoundation/CFString.h>
 
 namespace __asan {
 
@@ -89,16 +88,52 @@
 void __asan_init();
 
 static const char kDyldInsertLibraries[] = "DYLD_INSERT_LIBRARIES";
+LowLevelAllocator allocator_for_env;
+
+// Change the value of the env var |name|, leaking the original value.
+// If |name_value| is NULL, the variable is deleted from the environment,
+// otherwise the corresponding "NAME=value" string is replaced with
+// |name_value|.
+void LeakyResetEnv(const char *name, const char *name_value) {
+  char ***env_ptr = _NSGetEnviron();
+  CHECK(env_ptr);
+  char **environ = *env_ptr;
+  CHECK(environ);
+  uptr name_len = internal_strlen(name);
+  while (*environ != 0) {
+    uptr len = internal_strlen(*environ);
+    if (len > name_len) {
+      const char *p = *environ;
+      if (!internal_memcmp(p, name, name_len) && p[name_len] == '=') {
+        // Match.
+        if (name_value) {
+          // Replace the old value with the new one.
+          *environ = const_cast<char*>(name_value);
+        } else {
+          // Shift the subsequent pointers back.
+          char **del = environ;
+          do {
+            del[0] = del[1];
+          } while (*del++);
+        }
+      }
+    }
+    environ++;
+  }
+}
 
 void MaybeReexec() {
   if (!flags()->allow_reexec) return;
-#if MAC_INTERPOSE_FUNCTIONS
-  // If the program is linked with the dynamic ASan runtime library, make sure
-  // the library is preloaded so that the wrappers work. If it is not, set
-  // DYLD_INSERT_LIBRARIES and re-exec ourselves.
+  // Make sure the dynamic ASan runtime library is preloaded so that the
+  // wrappers work. If it is not, set DYLD_INSERT_LIBRARIES and re-exec
+  // ourselves.
   Dl_info info;
   CHECK(dladdr((void*)((uptr)__asan_init), &info));
-  const char *dyld_insert_libraries = GetEnv(kDyldInsertLibraries);
+  char *dyld_insert_libraries =
+      const_cast<char*>(GetEnv(kDyldInsertLibraries));
+  uptr old_env_len = dyld_insert_libraries ?
+      internal_strlen(dyld_insert_libraries) : 0;
+  uptr fname_len = internal_strlen(info.dli_fname);
   if (!dyld_insert_libraries ||
       !REAL(strstr)(dyld_insert_libraries, info.dli_fname)) {
     // DYLD_INSERT_LIBRARIES is not set or does not contain the runtime
@@ -106,19 +141,80 @@
     char program_name[1024];
     uint32_t buf_size = sizeof(program_name);
     _NSGetExecutablePath(program_name, &buf_size);
-    // Ok to use setenv() since the wrappers don't depend on the value of
-    // asan_inited.
-    setenv(kDyldInsertLibraries, info.dli_fname, /*overwrite*/0);
+    char *new_env = const_cast<char*>(info.dli_fname);
+    if (dyld_insert_libraries) {
+      // Append the runtime dylib name to the existing value of
+      // DYLD_INSERT_LIBRARIES.
+      new_env = (char*)allocator_for_env.Allocate(old_env_len + fname_len + 2);
+      internal_strncpy(new_env, dyld_insert_libraries, old_env_len);
+      new_env[old_env_len] = ':';
+      // Copy fname_len and add a trailing zero.
+      internal_strncpy(new_env + old_env_len + 1, info.dli_fname,
+                       fname_len + 1);
+      // Ok to use setenv() since the wrappers don't depend on the value of
+      // asan_inited.
+      setenv(kDyldInsertLibraries, new_env, /*overwrite*/1);
+    } else {
+      // Set DYLD_INSERT_LIBRARIES equal to the runtime dylib name.
+      setenv(kDyldInsertLibraries, info.dli_fname, /*overwrite*/0);
+    }
     if (flags()->verbosity >= 1) {
       Report("exec()-ing the program with\n");
-      Report("%s=%s\n", kDyldInsertLibraries, info.dli_fname);
+      Report("%s=%s\n", kDyldInsertLibraries, new_env);
       Report("to enable ASan wrappers.\n");
       Report("Set ASAN_OPTIONS=allow_reexec=0 to disable this.\n");
     }
     execv(program_name, *_NSGetArgv());
+  } else {
+    // DYLD_INSERT_LIBRARIES is set and contains the runtime library.
+    if (old_env_len == fname_len) {
+      // It's just the runtime library name - fine to unset the variable.
+      LeakyResetEnv(kDyldInsertLibraries, NULL);
+    } else {
+      uptr env_name_len = internal_strlen(kDyldInsertLibraries);
+      // Allocate memory to hold the previous env var name, its value, the '='
+      // sign and the '\0' char.
+      char *new_env = (char*)allocator_for_env.Allocate(
+          old_env_len + 2 + env_name_len);
+      CHECK(new_env);
+      internal_memset(new_env, '\0', old_env_len + 2 + env_name_len);
+      internal_strncpy(new_env, kDyldInsertLibraries, env_name_len);
+      new_env[env_name_len] = '=';
+      char *new_env_pos = new_env + env_name_len + 1;
+
+      // Iterate over colon-separated pieces of |dyld_insert_libraries|.
+      char *piece_start = dyld_insert_libraries;
+      char *piece_end = NULL;
+      char *old_env_end = dyld_insert_libraries + old_env_len;
+      do {
+        if (piece_start[0] == ':') piece_start++;
+        piece_end =  REAL(strchr)(piece_start, ':');
+        if (!piece_end) piece_end = dyld_insert_libraries + old_env_len;
+        if ((uptr)(piece_start - dyld_insert_libraries) > old_env_len) break;
+        uptr piece_len = piece_end - piece_start;
+
+        // If the current piece isn't the runtime library name,
+        // append it to new_env.
+        if ((piece_len != fname_len) ||
+            (internal_strncmp(piece_start, info.dli_fname, fname_len) != 0)) {
+          if (new_env_pos != new_env + env_name_len + 1) {
+            new_env_pos[0] = ':';
+            new_env_pos++;
+          }
+          internal_strncpy(new_env_pos, piece_start, piece_len);
+        }
+        // Move on to the next piece.
+        new_env_pos += piece_len;
+        piece_start = piece_end;
+      } while (piece_start < old_env_end);
+
+      // Can't use setenv() here, because it requires the allocator to be
+      // initialized.
+      // FIXME: instead of filtering DYLD_INSERT_LIBRARIES here, do it in
+      // a separate function called after InitializeAllocator().
+      LeakyResetEnv(kDyldInsertLibraries, new_env);
+    }
   }
-#endif  // MAC_INTERPOSE_FUNCTIONS
-  // If we're not using the dynamic runtime, do nothing.
 }
 
 // No-op. Mac does not support static linkage anyway.
@@ -131,36 +227,10 @@
 }
 
 void AsanPlatformThreadInit() {
-  // For the first program thread, we can't replace the allocator before
-  // __CFInitialize() has been called. If it hasn't, we'll call
-  // MaybeReplaceCFAllocator() later on this thread.
-  // For other threads __CFInitialize() has been called before their creation.
-  // See also asan_malloc_mac.cc.
-  if (((CFRuntimeBase*)kCFAllocatorSystemDefault)->_cfisa) {
-    MaybeReplaceCFAllocator();
-  }
 }
 
-AsanLock::AsanLock(LinkerInitialized) {
-  // We assume that OS_SPINLOCK_INIT is zero
-}
-
-void AsanLock::Lock() {
-  CHECK(sizeof(OSSpinLock) <= sizeof(opaque_storage_));
-  CHECK(OS_SPINLOCK_INIT == 0);
-  CHECK(owner_ != (uptr)pthread_self());
-  OSSpinLockLock((OSSpinLock*)&opaque_storage_);
-  CHECK(!owner_);
-  owner_ = (uptr)pthread_self();
-}
-
-void AsanLock::Unlock() {
-  CHECK(owner_ == (uptr)pthread_self());
-  owner_ = 0;
-  OSSpinLockUnlock((OSSpinLock*)&opaque_storage_);
-}
-
-void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp) {
+void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp, bool fast) {
+  (void)fast;
   stack->size = 0;
   stack->trace[0] = pc;
   if ((max_s) > 1) {
@@ -171,61 +241,10 @@
   }
 }
 
-void ClearShadowMemoryForContext(void *context) {
+void ReadContextStack(void *context, uptr *stack, uptr *ssize) {
   UNIMPLEMENTED();
 }
 
-// The range of pages to be used for escape islands.
-// TODO(glider): instead of mapping a fixed range we must find a range of
-// unmapped pages in vmmap and take them.
-// These constants were chosen empirically and may not work if the shadow
-// memory layout changes. Unfortunately they do necessarily depend on
-// kHighMemBeg or kHighMemEnd.
-static void *island_allocator_pos = 0;
-
-#if SANITIZER_WORDSIZE == 32
-# define kIslandEnd (0xffdf0000 - GetPageSizeCached())
-# define kIslandBeg (kIslandEnd - 256 * GetPageSizeCached())
-#else
-# define kIslandEnd (0x7fffffdf0000 - GetPageSizeCached())
-# define kIslandBeg (kIslandEnd - 256 * GetPageSizeCached())
-#endif
-
-extern "C"
-mach_error_t __interception_allocate_island(void **ptr,
-                                            uptr unused_size,
-                                            void *unused_hint) {
-  if (!island_allocator_pos) {
-    island_allocator_pos =
-        internal_mmap((void*)kIslandBeg, kIslandEnd - kIslandBeg,
-                      PROT_READ | PROT_WRITE | PROT_EXEC,
-                      MAP_PRIVATE | MAP_ANON | MAP_FIXED,
-                      -1, 0);
-    if (island_allocator_pos != (void*)kIslandBeg) {
-      return KERN_NO_SPACE;
-    }
-    if (flags()->verbosity) {
-      Report("Mapped pages %p--%p for branch islands.\n",
-             (void*)kIslandBeg, (void*)kIslandEnd);
-    }
-    // Should not be very performance-critical.
-    internal_memset(island_allocator_pos, 0xCC, kIslandEnd - kIslandBeg);
-  };
-  *ptr = island_allocator_pos;
-  island_allocator_pos = (char*)island_allocator_pos + GetPageSizeCached();
-  if (flags()->verbosity) {
-    Report("Branch island allocated at %p\n", *ptr);
-  }
-  return err_none;
-}
-
-extern "C"
-mach_error_t __interception_deallocate_island(void *ptr) {
-  // Do nothing.
-  // TODO(glider): allow to free and reuse the island memory.
-  return err_none;
-}
-
 // Support for the following functions from libdispatch on Mac OS:
 //   dispatch_async_f()
 //   dispatch_async()
@@ -255,9 +274,6 @@
 // The implementation details are at
 //   http://libdispatch.macosforge.org/trac/browser/trunk/src/queue.c
 
-typedef void* pthread_workqueue_t;
-typedef void* pthread_workitem_handle_t;
-
 typedef void* dispatch_group_t;
 typedef void* dispatch_queue_t;
 typedef void* dispatch_source_t;
@@ -272,27 +288,6 @@
   u32 parent_tid;
 } asan_block_context_t;
 
-// We use extern declarations of libdispatch functions here instead
-// of including <dispatch/dispatch.h>. This header is not present on
-// Mac OS X Leopard and eariler, and although we don't expect ASan to
-// work on legacy systems, it's bad to break the build of
-// LLVM compiler-rt there.
-extern "C" {
-void dispatch_async_f(dispatch_queue_t dq, void *ctxt,
-                      dispatch_function_t func);
-void dispatch_sync_f(dispatch_queue_t dq, void *ctxt,
-                     dispatch_function_t func);
-void dispatch_after_f(dispatch_time_t when, dispatch_queue_t dq, void *ctxt,
-                      dispatch_function_t func);
-void dispatch_barrier_async_f(dispatch_queue_t dq, void *ctxt,
-                              dispatch_function_t func);
-void dispatch_group_async_f(dispatch_group_t group, dispatch_queue_t dq,
-                            void *ctxt, dispatch_function_t func);
-int pthread_workqueue_additem_np(pthread_workqueue_t workq,
-    void *(*workitem_func)(void *), void * workitem_arg,
-    pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp);
-}  // extern "C"
-
 static ALWAYS_INLINE
 void asan_register_worker_thread(int parent_tid, StackTrace *stack) {
   AsanThread *t = asanThreadRegistry().GetCurrent();
@@ -308,7 +303,7 @@
 // alloc_asan_context().
 extern "C"
 void asan_dispatch_call_block_and_release(void *block) {
-  GET_STACK_TRACE_HERE(kStackTraceMax);
+  GET_STACK_TRACE_THREAD;
   asan_block_context_t *context = (asan_block_context_t*)block;
   if (flags()->verbosity >= 2) {
     Report("asan_dispatch_call_block_and_release(): "
@@ -318,7 +313,7 @@
   asan_register_worker_thread(context->parent_tid, &stack);
   // Call the original dispatcher for the block.
   context->func(context->block);
-  asan_free(context, &stack);
+  asan_free(context, &stack, FROM_MALLOC);
 }
 
 }  // namespace __asan
@@ -343,7 +338,7 @@
 #define INTERCEPT_DISPATCH_X_F_3(dispatch_x_f)                                \
   INTERCEPTOR(void, dispatch_x_f, dispatch_queue_t dq, void *ctxt,            \
                                   dispatch_function_t func) {                 \
-    GET_STACK_TRACE_HERE(kStackTraceMax);                                     \
+    GET_STACK_TRACE_THREAD;                                                   \
     asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack); \
     if (flags()->verbosity >= 2) {                                            \
       Report(#dispatch_x_f "(): context: %p, pthread_self: %p\n",             \
@@ -361,7 +356,7 @@
 INTERCEPTOR(void, dispatch_after_f, dispatch_time_t when,
                                     dispatch_queue_t dq, void *ctxt,
                                     dispatch_function_t func) {
-  GET_STACK_TRACE_HERE(kStackTraceMax);
+  GET_STACK_TRACE_THREAD;
   asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack);
   if (flags()->verbosity >= 2) {
     Report("dispatch_after_f: %p\n", asan_ctxt);
@@ -374,7 +369,7 @@
 INTERCEPTOR(void, dispatch_group_async_f, dispatch_group_t group,
                                           dispatch_queue_t dq, void *ctxt,
                                           dispatch_function_t func) {
-  GET_STACK_TRACE_HERE(kStackTraceMax);
+  GET_STACK_TRACE_THREAD;
   asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack);
   if (flags()->verbosity >= 2) {
     Report("dispatch_group_async_f(): context: %p, pthread_self: %p\n",
@@ -385,14 +380,7 @@
                                asan_dispatch_call_block_and_release);
 }
 
-#if MAC_INTERPOSE_FUNCTIONS && !defined(MISSING_BLOCKS_SUPPORT)
-// dispatch_async, dispatch_group_async and others tailcall the corresponding
-// dispatch_*_f functions. When wrapping functions with mach_override, those
-// dispatch_*_f are intercepted automatically. But with dylib interposition
-// this does not work, because the calls within the same library are not
-// interposed.
-// Therefore we need to re-implement dispatch_async and friends.
-
+#if !defined(MISSING_BLOCKS_SUPPORT)
 extern "C" {
 // FIXME: consolidate these declarations with asan_intercepted_functions.h.
 void dispatch_async(dispatch_queue_t dq, void(^work)(void));
@@ -409,7 +397,7 @@
   void (^asan_block)(void);  \
   int parent_tid = asanThreadRegistry().GetCurrentTidOrInvalid(); \
   asan_block = ^(void) { \
-    GET_STACK_TRACE_HERE(kStackTraceMax); \
+    GET_STACK_TRACE_THREAD; \
     asan_register_worker_thread(parent_tid, &stack); \
     work(); \
   }
@@ -445,96 +433,4 @@
 }
 #endif
 
-// The following stuff has been extremely helpful while looking for the
-// unhandled functions that spawned jobs on Chromium shutdown. If the verbosity
-// level is 2 or greater, we wrap pthread_workqueue_additem_np() in order to
-// find the points of worker thread creation (each of such threads may be used
-// to run several tasks, that's why this is not enough to support the whole
-// libdispatch API.
-extern "C"
-void *wrap_workitem_func(void *arg) {
-  if (flags()->verbosity >= 2) {
-    Report("wrap_workitem_func: %p, pthread_self: %p\n", arg, pthread_self());
-  }
-  asan_block_context_t *ctxt = (asan_block_context_t*)arg;
-  worker_t fn = (worker_t)(ctxt->func);
-  void *result =  fn(ctxt->block);
-  GET_STACK_TRACE_HERE(kStackTraceMax);
-  asan_free(arg, &stack);
-  return result;
-}
-
-INTERCEPTOR(int, pthread_workqueue_additem_np, pthread_workqueue_t workq,
-    void *(*workitem_func)(void *), void * workitem_arg,
-    pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp) {
-  GET_STACK_TRACE_HERE(kStackTraceMax);
-  asan_block_context_t *asan_ctxt =
-      (asan_block_context_t*) asan_malloc(sizeof(asan_block_context_t), &stack);
-  asan_ctxt->block = workitem_arg;
-  asan_ctxt->func = (dispatch_function_t)workitem_func;
-  asan_ctxt->parent_tid = asanThreadRegistry().GetCurrentTidOrInvalid();
-  if (flags()->verbosity >= 2) {
-    Report("pthread_workqueue_additem_np: %p\n", asan_ctxt);
-    PRINT_CURRENT_STACK();
-  }
-  return REAL(pthread_workqueue_additem_np)(workq, wrap_workitem_func,
-                                            asan_ctxt, itemhandlep,
-                                            gencountp);
-}
-
-// See http://opensource.apple.com/source/CF/CF-635.15/CFString.c
-int __CFStrIsConstant(CFStringRef str) {
-  CFRuntimeBase *base = (CFRuntimeBase*)str;
-#if __LP64__
-  return base->_rc == 0;
-#else
-  return (base->_cfinfo[CF_RC_BITS]) == 0;
-#endif
-}
-
-INTERCEPTOR(CFStringRef, CFStringCreateCopy, CFAllocatorRef alloc,
-                                             CFStringRef str) {
-  if (__CFStrIsConstant(str)) {
-    return str;
-  } else {
-    return REAL(CFStringCreateCopy)(alloc, str);
-  }
-}
-
-DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
-
-DECLARE_REAL_AND_INTERCEPTOR(void, __CFInitialize, void)
-
-namespace __asan {
-
-void InitializeMacInterceptors() {
-  CHECK(INTERCEPT_FUNCTION(dispatch_async_f));
-  CHECK(INTERCEPT_FUNCTION(dispatch_sync_f));
-  CHECK(INTERCEPT_FUNCTION(dispatch_after_f));
-  CHECK(INTERCEPT_FUNCTION(dispatch_barrier_async_f));
-  CHECK(INTERCEPT_FUNCTION(dispatch_group_async_f));
-  // We don't need to intercept pthread_workqueue_additem_np() to support the
-  // libdispatch API, but it helps us to debug the unsupported functions. Let's
-  // intercept it only during verbose runs.
-  if (flags()->verbosity >= 2) {
-    CHECK(INTERCEPT_FUNCTION(pthread_workqueue_additem_np));
-  }
-  // Normally CFStringCreateCopy should not copy constant CF strings.
-  // Replacing the default CFAllocator causes constant strings to be copied
-  // rather than just returned, which leads to bugs in big applications like
-  // Chromium and WebKit, see
-  // http://code.google.com/p/address-sanitizer/issues/detail?id=10
-  // Until this problem is fixed we need to check that the string is
-  // non-constant before calling CFStringCreateCopy.
-  CHECK(INTERCEPT_FUNCTION(CFStringCreateCopy));
-  // Some of the library functions call free() directly, so we have to
-  // intercept it.
-  CHECK(INTERCEPT_FUNCTION(free));
-  if (flags()->replace_cfallocator) {
-    CHECK(INTERCEPT_FUNCTION(__CFInitialize));
-  }
-}
-
-}  // namespace __asan
-
 #endif  // __APPLE__
diff --git a/lib/asan/asan_malloc_linux.cc b/lib/asan/asan_malloc_linux.cc
index 5eb23dd..c30c5db 100644
--- a/lib/asan/asan_malloc_linux.cc
+++ b/lib/asan/asan_malloc_linux.cc
@@ -19,6 +19,7 @@
 #include "asan_interceptors.h"
 #include "asan_internal.h"
 #include "asan_stack.h"
+#include "asan_thread_registry.h"
 
 #if ASAN_ANDROID
 DECLARE_REAL_AND_INTERCEPTOR(void*, malloc, uptr size)
@@ -59,17 +60,17 @@
 using namespace __asan;  // NOLINT
 
 INTERCEPTOR(void, free, void *ptr) {
-  GET_STACK_TRACE_HERE_FOR_FREE(ptr);
-  asan_free(ptr, &stack);
+  GET_STACK_TRACE_FREE;
+  asan_free(ptr, &stack, FROM_MALLOC);
 }
 
 INTERCEPTOR(void, cfree, void *ptr) {
-  GET_STACK_TRACE_HERE_FOR_FREE(ptr);
-  asan_free(ptr, &stack);
+  GET_STACK_TRACE_FREE;
+  asan_free(ptr, &stack, FROM_MALLOC);
 }
 
 INTERCEPTOR(void*, malloc, uptr size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_malloc(size, &stack);
 }
 
@@ -85,25 +86,25 @@
     CHECK(allocated < kCallocPoolSize);
     return mem;
   }
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_calloc(nmemb, size, &stack);
 }
 
 INTERCEPTOR(void*, realloc, void *ptr, uptr size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_realloc(ptr, size, &stack);
 }
 
 INTERCEPTOR(void*, memalign, uptr boundary, uptr size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
-  return asan_memalign(boundary, size, &stack);
+  GET_STACK_TRACE_MALLOC;
+  return asan_memalign(boundary, size, &stack, FROM_MALLOC);
 }
 
 INTERCEPTOR(void*, __libc_memalign, uptr align, uptr s)
   ALIAS("memalign");
 
 INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_malloc_usable_size(ptr, &stack);
 }
 
@@ -126,19 +127,23 @@
 }
 
 INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   // Printf("posix_memalign: %zx %zu\n", alignment, size);
   return asan_posix_memalign(memptr, alignment, size, &stack);
 }
 
 INTERCEPTOR(void*, valloc, uptr size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_valloc(size, &stack);
 }
 
 INTERCEPTOR(void*, pvalloc, uptr size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_pvalloc(size, &stack);
 }
 
+INTERCEPTOR(void, malloc_stats, void) {
+  __asan_print_accumulated_stats();
+}
+
 #endif  // __linux__
diff --git a/lib/asan/asan_malloc_mac.cc b/lib/asan/asan_malloc_mac.cc
index b32c18e..b8ec90e 100644
--- a/lib/asan/asan_malloc_mac.cc
+++ b/lib/asan/asan_malloc_mac.cc
@@ -36,85 +36,108 @@
 
 // TODO(glider): do we need both zones?
 static malloc_zone_t *system_malloc_zone = 0;
-static malloc_zone_t *system_purgeable_zone = 0;
 static malloc_zone_t asan_zone;
-CFAllocatorRef cf_asan = 0;
 
-// _CFRuntimeCreateInstance() checks whether the supplied allocator is
-// kCFAllocatorSystemDefault and, if it is not, stores the allocator reference
-// at the beginning of the allocated memory and returns the pointer to the
-// allocated memory plus sizeof(CFAllocatorRef). See
-// http://www.opensource.apple.com/source/CF/CF-635.21/CFRuntime.c
-// Pointers returned by _CFRuntimeCreateInstance() can then be passed directly
-// to free() or CFAllocatorDeallocate(), which leads to false invalid free
-// reports.
-// The corresponding rdar bug is http://openradar.appspot.com/radar?id=1796404.
-void* ALWAYS_INLINE get_saved_cfallocator_ref(void *ptr) {
-  if (flags()->replace_cfallocator) {
-    // Make sure we're not hitting the previous page. This may be incorrect
-    // if ASan's malloc returns an address ending with 0xFF8, which will be
-    // then padded to a page boundary with a CFAllocatorRef.
-    uptr arith_ptr = (uptr)ptr;
-    if ((arith_ptr & 0xFFF) > sizeof(CFAllocatorRef)) {
-      CFAllocatorRef *saved =
-          (CFAllocatorRef*)(arith_ptr - sizeof(CFAllocatorRef));
-      if ((*saved == cf_asan) && asan_mz_size(saved)) ptr = (void*)saved;
-    }
-  }
-  return ptr;
+INTERCEPTOR(malloc_zone_t *, malloc_create_zone,
+                             vm_size_t start_size, unsigned zone_flags) {
+  if (!asan_inited) __asan_init();
+  GET_STACK_TRACE_MALLOC;
+  malloc_zone_t *new_zone =
+      (malloc_zone_t*)asan_malloc(sizeof(asan_zone), &stack);
+  internal_memcpy(new_zone, &asan_zone, sizeof(asan_zone));
+  new_zone->zone_name = NULL;  // The name will be changed anyway.
+  return new_zone;
 }
 
-// The free() implementation provided by OS X calls malloc_zone_from_ptr()
-// to find the owner of |ptr|. If the result is 0, an invalid free() is
-// reported. Our implementation falls back to asan_free() in this case
-// in order to print an ASan-style report.
-//
-// For the objects created by _CFRuntimeCreateInstance a CFAllocatorRef is
-// placed at the beginning of the allocated chunk and the pointer returned by
-// our allocator is off by sizeof(CFAllocatorRef). This pointer can be then
-// passed directly to free(), which will lead to errors.
-// To overcome this we're checking whether |ptr-sizeof(CFAllocatorRef)|
-// contains a pointer to our CFAllocator (assuming no other allocator is used).
-// See http://code.google.com/p/address-sanitizer/issues/detail?id=70 for more
-// info.
+INTERCEPTOR(malloc_zone_t *, malloc_default_zone, void) {
+  if (!asan_inited) __asan_init();
+  return &asan_zone;
+}
+
+INTERCEPTOR(malloc_zone_t *, malloc_default_purgeable_zone, void) {
+  // FIXME: ASan should support purgeable allocations.
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=139
+  if (!asan_inited) __asan_init();
+  return &asan_zone;
+}
+
+INTERCEPTOR(void, malloc_make_purgeable, void *ptr) {
+  // FIXME: ASan should support purgeable allocations. Ignoring them is fine
+  // for now.
+  if (!asan_inited) __asan_init();
+}
+
+INTERCEPTOR(int, malloc_make_nonpurgeable, void *ptr) {
+  // FIXME: ASan should support purgeable allocations. Ignoring them is fine
+  // for now.
+  if (!asan_inited) __asan_init();
+  // Must return 0 if the contents were not purged since the last call to
+  // malloc_make_purgeable().
+  return 0;
+}
+
+INTERCEPTOR(void, malloc_set_zone_name, malloc_zone_t *zone, const char *name) {
+  if (!asan_inited) __asan_init();
+  // Allocate |strlen("asan-") + 1 + internal_strlen(name)| bytes.
+  size_t buflen = 6 + (name ? internal_strlen(name) : 0);
+  InternalScopedBuffer<char> new_name(buflen);
+  if (name && zone->introspect == asan_zone.introspect) {
+    internal_snprintf(new_name.data(), buflen, "asan-%s", name);
+    name = new_name.data();
+  }
+
+  // Call the system malloc's implementation for both external and our zones,
+  // since that appropriately changes VM region protections on the zone.
+  REAL(malloc_set_zone_name)(zone, name);
+}
+
+INTERCEPTOR(void *, malloc, size_t size) {
+  if (!asan_inited) __asan_init();
+  GET_STACK_TRACE_MALLOC;
+  void *res = asan_malloc(size, &stack);
+  return res;
+}
+
 INTERCEPTOR(void, free, void *ptr) {
-  malloc_zone_t *zone = malloc_zone_from_ptr(ptr);
-  if (zone) {
-#if defined(MAC_OS_X_VERSION_10_6) && \
-    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
-    if ((zone->version >= 6) && (zone->free_definite_size)) {
-      zone->free_definite_size(zone, ptr, malloc_size(ptr));
-    } else {
-      malloc_zone_free(zone, ptr);
-    }
-#else
-    malloc_zone_free(zone, ptr);
-#endif
-  } else {
-    if (!asan_mz_size(ptr)) ptr = get_saved_cfallocator_ref(ptr);
-    GET_STACK_TRACE_HERE_FOR_FREE(ptr);
-    asan_free(ptr, &stack);
-  }
+  if (!asan_inited) __asan_init();
+  if (!ptr) return;
+  GET_STACK_TRACE_FREE;
+  asan_free(ptr, &stack, FROM_MALLOC);
 }
 
-// We can't always replace the default CFAllocator with cf_asan right in
-// ReplaceSystemMalloc(), because it is sometimes called before
-// __CFInitialize(), when the default allocator is invalid and replacing it may
-// crash the program. Instead we wait for the allocator to initialize and jump
-// in just after __CFInitialize(). Nobody is going to allocate memory using
-// CFAllocators before that, so we won't miss anything.
-//
-// See http://code.google.com/p/address-sanitizer/issues/detail?id=87
-// and http://opensource.apple.com/source/CF/CF-550.43/CFRuntime.c
-INTERCEPTOR(void, __CFInitialize, void) {
-  // If the runtime is built as dynamic library, __CFInitialize wrapper may be
-  // called before __asan_init.
-#if !MAC_INTERPOSE_FUNCTIONS
-  CHECK(flags()->replace_cfallocator);
-  CHECK(asan_inited);
-#endif
-  REAL(__CFInitialize)();
-  if (!cf_asan && asan_inited) MaybeReplaceCFAllocator();
+INTERCEPTOR(void *, realloc, void *ptr, size_t size) {
+  if (!asan_inited) __asan_init();
+  GET_STACK_TRACE_MALLOC;
+  return asan_realloc(ptr, size, &stack);
+}
+
+INTERCEPTOR(void *, calloc, size_t nmemb, size_t size) {
+  if (!asan_inited) __asan_init();
+  GET_STACK_TRACE_MALLOC;
+  return asan_calloc(nmemb, size, &stack);
+}
+
+INTERCEPTOR(void *, valloc, size_t size) {
+  if (!asan_inited) __asan_init();
+  GET_STACK_TRACE_MALLOC;
+  return asan_memalign(GetPageSizeCached(), size, &stack, FROM_MALLOC);
+}
+
+INTERCEPTOR(size_t, malloc_good_size, size_t size) {
+  if (!asan_inited) __asan_init();
+  return asan_zone.introspect->good_size(&asan_zone, size);
+}
+
+INTERCEPTOR(int, posix_memalign, void **memptr, size_t alignment, size_t size) {
+  if (!asan_inited) __asan_init();
+  CHECK(memptr);
+  GET_STACK_TRACE_MALLOC;
+  void *result = asan_memalign(alignment, size, &stack, FROM_MALLOC);
+  if (result) {
+    *memptr = result;
+    return 0;
+  }
+  return -1;
 }
 
 namespace {
@@ -130,16 +153,7 @@
     CHECK(system_malloc_zone);
     return malloc_zone_malloc(system_malloc_zone, size);
   }
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
-  return asan_malloc(size, &stack);
-}
-
-void *cf_malloc(CFIndex size, CFOptionFlags hint, void *info) {
-  if (!asan_inited) {
-    CHECK(system_malloc_zone);
-    return malloc_zone_malloc(system_malloc_zone, size);
-  }
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_malloc(size, &stack);
 }
 
@@ -155,7 +169,7 @@
     CHECK(allocated < kCallocPoolSize);
     return mem;
   }
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_calloc(nmemb, size, &stack);
 }
 
@@ -164,8 +178,8 @@
     CHECK(system_malloc_zone);
     return malloc_zone_valloc(system_malloc_zone, size);
   }
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
-  return asan_memalign(GetPageSizeCached(), size, &stack);
+  GET_STACK_TRACE_MALLOC;
+  return asan_memalign(GetPageSizeCached(), size, &stack, FROM_MALLOC);
 }
 
 #define GET_ZONE_FOR_PTR(ptr) \
@@ -174,31 +188,14 @@
 
 void ALWAYS_INLINE free_common(void *context, void *ptr) {
   if (!ptr) return;
-  if (asan_mz_size(ptr)) {
-    GET_STACK_TRACE_HERE_FOR_FREE(ptr);
-    asan_free(ptr, &stack);
+  GET_STACK_TRACE_FREE;
+  // FIXME: need to retire this flag.
+  if (!flags()->mac_ignore_invalid_free) {
+    asan_free(ptr, &stack, FROM_MALLOC);
   } else {
-    // If the pointer does not belong to any of the zones, use one of the
-    // fallback methods to free memory.
-    malloc_zone_t *zone_ptr = malloc_zone_from_ptr(ptr);
-    if (zone_ptr == system_purgeable_zone) {
-      // allocations from malloc_default_purgeable_zone() done before
-      // __asan_init() may be occasionally freed via free_common().
-      // see http://code.google.com/p/address-sanitizer/issues/detail?id=99.
-      malloc_zone_free(zone_ptr, ptr);
-    } else {
-      // If the memory chunk pointer was moved to store additional
-      // CFAllocatorRef, fix it back.
-      ptr = get_saved_cfallocator_ref(ptr);
-      GET_STACK_TRACE_HERE_FOR_FREE(ptr);
-      if (!flags()->mac_ignore_invalid_free) {
-        asan_free(ptr, &stack);
-      } else {
-        GET_ZONE_FOR_PTR(ptr);
-        WarnMacFreeUnallocated((uptr)ptr, (uptr)zone_ptr, zone_name, &stack);
-        return;
-      }
-    }
+    GET_ZONE_FOR_PTR(ptr);
+    WarnMacFreeUnallocated((uptr)ptr, (uptr)zone_ptr, zone_name, &stack);
+    return;
   }
 }
 
@@ -207,52 +204,30 @@
   free_common(zone, ptr);
 }
 
-void cf_free(void *ptr, void *info) {
-  free_common(info, ptr);
-}
-
 void *mz_realloc(malloc_zone_t *zone, void *ptr, size_t size) {
   if (!ptr) {
-    GET_STACK_TRACE_HERE_FOR_MALLOC;
+    GET_STACK_TRACE_MALLOC;
     return asan_malloc(size, &stack);
   } else {
     if (asan_mz_size(ptr)) {
-      GET_STACK_TRACE_HERE_FOR_MALLOC;
+      GET_STACK_TRACE_MALLOC;
       return asan_realloc(ptr, size, &stack);
     } else {
       // We can't recover from reallocating an unknown address, because
       // this would require reading at most |size| bytes from
       // potentially unaccessible memory.
-      GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+      GET_STACK_TRACE_FREE;
       GET_ZONE_FOR_PTR(ptr);
       ReportMacMzReallocUnknown((uptr)ptr, (uptr)zone_ptr, zone_name, &stack);
     }
   }
 }
 
-void *cf_realloc(void *ptr, CFIndex size, CFOptionFlags hint, void *info) {
-  if (!ptr) {
-    GET_STACK_TRACE_HERE_FOR_MALLOC;
-    return asan_malloc(size, &stack);
-  } else {
-    if (asan_mz_size(ptr)) {
-      GET_STACK_TRACE_HERE_FOR_MALLOC;
-      return asan_realloc(ptr, size, &stack);
-    } else {
-      // We can't recover from reallocating an unknown address, because
-      // this would require reading at most |size| bytes from
-      // potentially unaccessible memory.
-      GET_STACK_TRACE_HERE_FOR_FREE(ptr);
-      GET_ZONE_FOR_PTR(ptr);
-      ReportMacCfReallocUnknown((uptr)ptr, (uptr)zone_ptr, zone_name, &stack);
-    }
-  }
-}
-
 void mz_destroy(malloc_zone_t* zone) {
   // A no-op -- we will not be destroyed!
-  Printf("mz_destroy() called -- ignoring\n");
+  Report("mz_destroy() called -- ignoring\n");
 }
+
   // from AvailabilityMacros.h
 #if defined(MAC_OS_X_VERSION_10_6) && \
     MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
@@ -261,8 +236,8 @@
     CHECK(system_malloc_zone);
     return malloc_zone_memalign(system_malloc_zone, align, size);
   }
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
-  return asan_memalign(align, size, &stack);
+  GET_STACK_TRACE_MALLOC;
+  return asan_memalign(align, size, &stack, FROM_MALLOC);
 }
 
 // This function is currently unused, and we build with -Werror.
@@ -324,23 +299,7 @@
 
 }  // unnamed namespace
 
-extern int __CFRuntimeClassTableSize;
-
 namespace __asan {
-void MaybeReplaceCFAllocator() {
-  static CFAllocatorContext asan_context = {
-        /*version*/ 0, /*info*/ &asan_zone,
-        /*retain*/ 0, /*release*/ 0,
-        /*copyDescription*/0,
-        /*allocate*/ &cf_malloc,
-        /*reallocate*/ &cf_realloc,
-        /*deallocate*/ &cf_free,
-        /*preferredSize*/ 0 };
-  if (!cf_asan)
-    cf_asan = CFAllocatorCreate(kCFAllocatorUseContext, &asan_context);
-  if (flags()->replace_cfallocator && CFAllocatorGetDefault() != cf_asan)
-    CFAllocatorSetDefault(cf_asan);
-}
 
 void ReplaceSystemMalloc() {
   static malloc_introspection_t asan_introspection;
@@ -380,41 +339,10 @@
   asan_zone.free_definite_size = 0;
   asan_zone.memalign = &mz_memalign;
   asan_introspection.zone_locked = &mi_zone_locked;
-
-  // Request the default purgable zone to force its creation. The
-  // current default zone is registered with the purgable zone for
-  // doing tiny and small allocs.  Sadly, it assumes that the default
-  // zone is the szone implementation from OS X and will crash if it
-  // isn't.  By creating the zone now, this will be true and changing
-  // the default zone won't cause a problem.  (OS X 10.6 and higher.)
-  system_purgeable_zone = malloc_default_purgeable_zone();
 #endif
 
-  // Register the ASan zone. At this point, it will not be the
-  // default zone.
+  // Register the ASan zone.
   malloc_zone_register(&asan_zone);
-
-  // Unregister and reregister the default zone.  Unregistering swaps
-  // the specified zone with the last one registered which for the
-  // default zone makes the more recently registered zone the default
-  // zone.  The default zone is then re-registered to ensure that
-  // allocations made from it earlier will be handled correctly.
-  // Things are not guaranteed to work that way, but it's how they work now.
-  system_malloc_zone = malloc_default_zone();
-  malloc_zone_unregister(system_malloc_zone);
-  malloc_zone_register(system_malloc_zone);
-  // Make sure the default allocator was replaced.
-  CHECK(malloc_default_zone() == &asan_zone);
-
-  // If __CFInitialize() hasn't been called yet, cf_asan will be created and
-  // installed as the default allocator after __CFInitialize() finishes (see
-  // the interceptor for __CFInitialize() above). Otherwise install cf_asan
-  // right now. On both Snow Leopard and Lion __CFInitialize() calls
-  // __CFAllocatorInitialize(), which initializes the _base._cfisa field of
-  // the default allocators we check here.
-  if (((CFRuntimeBase*)kCFAllocatorSystemDefault)->_cfisa) {
-    MaybeReplaceCFAllocator();
-  }
 }
 }  // namespace __asan
 
diff --git a/lib/asan/asan_malloc_win.cc b/lib/asan/asan_malloc_win.cc
index 3ec76d8..9fcfea5 100644
--- a/lib/asan/asan_malloc_win.cc
+++ b/lib/asan/asan_malloc_win.cc
@@ -31,8 +31,8 @@
 
 extern "C" {
 void free(void *ptr) {
-  GET_STACK_TRACE_HERE_FOR_FREE(ptr);
-  return asan_free(ptr, &stack);
+  GET_STACK_TRACE_FREE;
+  return asan_free(ptr, &stack, FROM_MALLOC);
 }
 
 void _free_dbg(void* ptr, int) {
@@ -44,7 +44,7 @@
 }
 
 void *malloc(size_t size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_malloc(size, &stack);
 }
 
@@ -53,7 +53,7 @@
 }
 
 void *calloc(size_t nmemb, size_t size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_calloc(nmemb, size, &stack);
 }
 
@@ -66,7 +66,7 @@
 }
 
 void *realloc(void *ptr, size_t size) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_realloc(ptr, size, &stack);
 }
 
@@ -85,7 +85,7 @@
 }
 
 size_t _msize(void *ptr) {
-  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  GET_STACK_TRACE_MALLOC;
   return asan_malloc_usable_size(ptr, &stack);
 }
 
diff --git a/lib/asan/asan_mapping.h b/lib/asan/asan_mapping.h
index 3a5f88b..161ab65 100644
--- a/lib/asan/asan_mapping.h
+++ b/lib/asan/asan_mapping.h
@@ -18,10 +18,41 @@
 
 // The full explanation of the memory mapping could be found here:
 // http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm
+//
+// Typical shadow mapping on Linux/x86_64 with SHADOW_OFFSET == 0x00007fff8000:
+// || `[0x10007fff8000, 0x7fffffffffff]` || HighMem    ||
+// || `[0x02008fff7000, 0x10007fff7fff]` || HighShadow ||
+// || `[0x00008fff7000, 0x02008fff6fff]` || ShadowGap  ||
+// || `[0x00007fff8000, 0x00008fff6fff]` || LowShadow  ||
+// || `[0x000000000000, 0x00007fff7fff]` || LowMem     ||
+//
+// When SHADOW_OFFSET is zero (-pie):
+// || `[0x100000000000, 0x7fffffffffff]` || HighMem    ||
+// || `[0x020000000000, 0x0fffffffffff]` || HighShadow ||
+// || `[0x000000040000, 0x01ffffffffff]` || ShadowGap  ||
+//
+// Special case when something is already mapped between
+// 0x003000000000 and 0x005000000000 (e.g. when prelink is installed):
+// || `[0x10007fff8000, 0x7fffffffffff]` || HighMem    ||
+// || `[0x02008fff7000, 0x10007fff7fff]` || HighShadow ||
+// || `[0x005000000000, 0x02008fff6fff]` || ShadowGap3 ||
+// || `[0x003000000000, 0x004fffffffff]` || MidMem     ||
+// || `[0x000a7fff8000, 0x002fffffffff]` || ShadowGap2 ||
+// || `[0x00067fff8000, 0x000a7fff7fff]` || MidShadow  ||
+// || `[0x00008fff7000, 0x00067fff7fff]` || ShadowGap  ||
+// || `[0x00007fff8000, 0x00008fff6fff]` || LowShadow  ||
+// || `[0x000000000000, 0x00007fff7fff]` || LowMem     ||
+//
+// Default Linux/i386 mapping:
+// || `[0x40000000, 0xffffffff]` || HighMem    ||
+// || `[0x28000000, 0x3fffffff]` || HighShadow ||
+// || `[0x24000000, 0x27ffffff]` || ShadowGap  ||
+// || `[0x20000000, 0x23ffffff]` || LowShadow  ||
+// || `[0x00000000, 0x1fffffff]` || LowMem     ||
 
 #if ASAN_FLEXIBLE_MAPPING_AND_OFFSET == 1
-extern __attribute__((visibility("default"))) uptr __asan_mapping_scale;
-extern __attribute__((visibility("default"))) uptr __asan_mapping_offset;
+extern SANITIZER_INTERFACE_ATTRIBUTE uptr __asan_mapping_scale;
+extern SANITIZER_INTERFACE_ATTRIBUTE uptr __asan_mapping_offset;
 # define SHADOW_SCALE (__asan_mapping_scale)
 # define SHADOW_OFFSET (__asan_mapping_offset)
 #else
@@ -36,27 +67,20 @@
 #   if defined(__powerpc64__)
 #    define SHADOW_OFFSET (1ULL << 41)
 #   else
-#    define SHADOW_OFFSET (1ULL << 44)
+#    if ASAN_MAC
+#     define SHADOW_OFFSET (1ULL << 44)
+#    else
+#     define SHADOW_OFFSET 0x7fff8000ULL
+#    endif
 #   endif
 #  endif
 # endif
 #endif  // ASAN_FLEXIBLE_MAPPING_AND_OFFSET
 
 #define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
-#define MEM_TO_SHADOW(mem) (((mem) >> SHADOW_SCALE) | (SHADOW_OFFSET))
+#define MEM_TO_SHADOW(mem) (((mem) >> SHADOW_SCALE) + (SHADOW_OFFSET))
 #define SHADOW_TO_MEM(shadow) (((shadow) - SHADOW_OFFSET) << SHADOW_SCALE)
 
-#if SANITIZER_WORDSIZE == 64
-# if defined(__powerpc64__)
-  static const uptr kHighMemEnd = 0x00000fffffffffffUL;
-# else
-  static const uptr kHighMemEnd = 0x00007fffffffffffUL;
-# endif
-#else  // SANITIZER_WORDSIZE == 32
-  static const uptr kHighMemEnd = 0xffffffff;
-#endif  // SANITIZER_WORDSIZE
-
-
 #define kLowMemBeg      0
 #define kLowMemEnd      (SHADOW_OFFSET ? SHADOW_OFFSET - 1 : 0)
 
@@ -68,59 +92,121 @@
 #define kHighShadowBeg  MEM_TO_SHADOW(kHighMemBeg)
 #define kHighShadowEnd  MEM_TO_SHADOW(kHighMemEnd)
 
+# define kMidShadowBeg MEM_TO_SHADOW(kMidMemBeg)
+# define kMidShadowEnd MEM_TO_SHADOW(kMidMemEnd)
+
 // With the zero shadow base we can not actually map pages starting from 0.
 // This constant is somewhat arbitrary.
 #define kZeroBaseShadowStart (1 << 18)
 
 #define kShadowGapBeg   (kLowShadowEnd ? kLowShadowEnd + 1 \
                                        : kZeroBaseShadowStart)
-#define kShadowGapEnd   (kHighShadowBeg - 1)
+#define kShadowGapEnd   ((kMidMemBeg ? kMidShadowBeg : kHighShadowBeg) - 1)
 
-#define kGlobalAndStackRedzone \
-      (SHADOW_GRANULARITY < 32 ? 32 : SHADOW_GRANULARITY)
+#define kShadowGap2Beg (kMidMemBeg ? kMidShadowEnd + 1 : 0)
+#define kShadowGap2End (kMidMemBeg ? kMidMemBeg - 1 : 0)
+
+#define kShadowGap3Beg (kMidMemBeg ? kMidMemEnd + 1 : 0)
+#define kShadowGap3End (kMidMemBeg ? kHighShadowBeg - 1 : 0)
+
+#define DO_ASAN_MAPPING_PROFILE 0  // Set to 1 to profile the functions below.
+
+#if DO_ASAN_MAPPING_PROFILE
+# define PROFILE_ASAN_MAPPING() AsanMappingProfile[__LINE__]++;
+#else
+# define PROFILE_ASAN_MAPPING()
+#endif
+
+// If 1, all shadow boundaries are constants.
+// Don't set to 1 other than for testing.
+#define ASAN_FIXED_MAPPING 0
 
 namespace __asan {
 
+extern uptr AsanMappingProfile[];
+
+#if ASAN_FIXED_MAPPING
+// Fixed mapping for 64-bit Linux. Mostly used for performance comparison
+// with non-fixed mapping. As of r175253 (Feb 2013) the performance
+// difference between fixed and non-fixed mapping is below the noise level.
+static uptr kHighMemEnd = 0x7fffffffffffULL;
+static uptr kMidMemBeg =    0x3000000000ULL;
+static uptr kMidMemEnd =    0x4fffffffffULL;
+#else
+SANITIZER_INTERFACE_ATTRIBUTE
+extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd;  // Initialized in __asan_init.
+#endif
+
 static inline bool AddrIsInLowMem(uptr a) {
+  PROFILE_ASAN_MAPPING();
   return a < kLowMemEnd;
 }
 
 static inline bool AddrIsInLowShadow(uptr a) {
+  PROFILE_ASAN_MAPPING();
   return a >= kLowShadowBeg && a <= kLowShadowEnd;
 }
 
 static inline bool AddrIsInHighMem(uptr a) {
+  PROFILE_ASAN_MAPPING();
   return a >= kHighMemBeg && a <= kHighMemEnd;
 }
 
+static inline bool AddrIsInMidMem(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return kMidMemBeg && a >= kMidMemBeg && a <= kMidMemEnd;
+}
+
 static inline bool AddrIsInMem(uptr a) {
-  return AddrIsInLowMem(a) || AddrIsInHighMem(a);
+  PROFILE_ASAN_MAPPING();
+  return AddrIsInLowMem(a) || AddrIsInMidMem(a) || AddrIsInHighMem(a);
 }
 
 static inline uptr MemToShadow(uptr p) {
+  PROFILE_ASAN_MAPPING();
   CHECK(AddrIsInMem(p));
   return MEM_TO_SHADOW(p);
 }
 
 static inline bool AddrIsInHighShadow(uptr a) {
-  return a >= kHighShadowBeg && a <=  kHighMemEnd;
+  PROFILE_ASAN_MAPPING();
+  return a >= kHighShadowBeg && a <= kHighMemEnd;
+}
+
+static inline bool AddrIsInMidShadow(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return kMidMemBeg && a >= kMidShadowBeg && a <= kMidMemEnd;
 }
 
 static inline bool AddrIsInShadow(uptr a) {
-  return AddrIsInLowShadow(a) || AddrIsInHighShadow(a);
+  PROFILE_ASAN_MAPPING();
+  return AddrIsInLowShadow(a) || AddrIsInMidShadow(a) || AddrIsInHighShadow(a);
 }
 
 static inline bool AddrIsInShadowGap(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  if (kMidMemBeg) {
+    if (a <= kShadowGapEnd)
+      return SHADOW_OFFSET == 0 || a >= kShadowGapBeg;
+    return (a >= kShadowGap2Beg && a <= kShadowGap2End) ||
+           (a >= kShadowGap3Beg && a <= kShadowGap3End);
+  }
+  // In zero-based shadow mode we treat addresses near zero as addresses
+  // in shadow gap as well.
+  if (SHADOW_OFFSET == 0)
+    return a <= kShadowGapEnd;
   return a >= kShadowGapBeg && a <= kShadowGapEnd;
 }
 
 static inline bool AddrIsAlignedByGranularity(uptr a) {
+  PROFILE_ASAN_MAPPING();
   return (a & (SHADOW_GRANULARITY - 1)) == 0;
 }
 
 static inline bool AddressIsPoisoned(uptr a) {
+  PROFILE_ASAN_MAPPING();
   const uptr kAccessSize = 1;
-  u8 *shadow_address = (u8*)MemToShadow(a);
+  u8 *shadow_address = (u8*)MEM_TO_SHADOW(a);
   s8 shadow_value = *shadow_address;
   if (shadow_value) {
     u8 last_accessed_byte = (a & (SHADOW_GRANULARITY - 1))
@@ -130,6 +216,9 @@
   return false;
 }
 
+// Must be after all calls to PROFILE_ASAN_MAPPING().
+static const uptr kAsanMappingProfileSize = __LINE__;
+
 }  // namespace __asan
 
 #endif  // ASAN_MAPPING_H
diff --git a/lib/asan/asan_new_delete.cc b/lib/asan/asan_new_delete.cc
index 756810d..40aa31c 100644
--- a/lib/asan/asan_new_delete.cc
+++ b/lib/asan/asan_new_delete.cc
@@ -28,6 +28,7 @@
 using namespace __asan;  // NOLINT
 
 // On Android new() goes through malloc interceptors.
+// See also https://code.google.com/p/address-sanitizer/issues/detail?id=131.
 #if !ASAN_ANDROID
 
 // Fake std::nothrow_t to avoid including <new>.
@@ -35,32 +36,73 @@
 struct nothrow_t {};
 }  // namespace std
 
-#define OPERATOR_NEW_BODY \
-  GET_STACK_TRACE_HERE_FOR_MALLOC;\
-  return asan_memalign(0, size, &stack);
+#define OPERATOR_NEW_BODY(type) \
+  GET_STACK_TRACE_MALLOC;\
+  return asan_memalign(0, size, &stack, type);
 
+// On OS X it's not enough to just provide our own 'operator new' and
+// 'operator delete' implementations, because they're going to be in the
+// runtime dylib, and the main executable will depend on both the runtime
+// dylib and libstdc++, each of those'll have its implementation of new and
+// delete.
+// To make sure that C++ allocation/deallocation operators are overridden on
+// OS X we need to intercept them using their mangled names.
+#if !defined(__APPLE__)
 INTERCEPTOR_ATTRIBUTE
-void *operator new(size_t size) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size) { OPERATOR_NEW_BODY(FROM_NEW); }
 INTERCEPTOR_ATTRIBUTE
-void *operator new[](size_t size) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size) { OPERATOR_NEW_BODY(FROM_NEW_BR); }
 INTERCEPTOR_ATTRIBUTE
-void *operator new(size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY(FROM_NEW); }
 INTERCEPTOR_ATTRIBUTE
-void *operator new[](size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY(FROM_NEW_BR); }
 
-#define OPERATOR_DELETE_BODY \
-  GET_STACK_TRACE_HERE_FOR_FREE(ptr);\
-  asan_free(ptr, &stack);
+#else  // __APPLE__
+INTERCEPTOR(void *, _Znwm, size_t size) {
+  OPERATOR_NEW_BODY(FROM_NEW);
+}
+INTERCEPTOR(void *, _Znam, size_t size) {
+  OPERATOR_NEW_BODY(FROM_NEW_BR);
+}
+INTERCEPTOR(void *, _ZnwmRKSt9nothrow_t, size_t size, std::nothrow_t const&) {
+  OPERATOR_NEW_BODY(FROM_NEW);
+}
+INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const&) {
+  OPERATOR_NEW_BODY(FROM_NEW_BR);
+}
+#endif
 
+#define OPERATOR_DELETE_BODY(type) \
+  GET_STACK_TRACE_FREE;\
+  asan_free(ptr, &stack, type);
+
+#if !defined(__APPLE__)
 INTERCEPTOR_ATTRIBUTE
-void operator delete(void *ptr) { OPERATOR_DELETE_BODY; }
+void operator delete(void *ptr) { OPERATOR_DELETE_BODY(FROM_NEW); }
 INTERCEPTOR_ATTRIBUTE
-void operator delete[](void *ptr) { OPERATOR_DELETE_BODY; }
+void operator delete[](void *ptr) { OPERATOR_DELETE_BODY(FROM_NEW_BR); }
 INTERCEPTOR_ATTRIBUTE
 void operator delete(void *ptr, std::nothrow_t const&)
-{ OPERATOR_DELETE_BODY; }
+{ OPERATOR_DELETE_BODY(FROM_NEW); }
 INTERCEPTOR_ATTRIBUTE
 void operator delete[](void *ptr, std::nothrow_t const&)
-{ OPERATOR_DELETE_BODY; }
+{ OPERATOR_DELETE_BODY(FROM_NEW_BR); }
+
+#else  // __APPLE__
+INTERCEPTOR(void, _ZdlPv, void *ptr) {
+  OPERATOR_DELETE_BODY(FROM_NEW);
+}
+INTERCEPTOR(void, _ZdaPv, void *ptr) {
+  OPERATOR_DELETE_BODY(FROM_NEW_BR);
+}
+INTERCEPTOR(void, _ZdlPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY(FROM_NEW);
+}
+INTERCEPTOR(void, _ZdaPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY(FROM_NEW_BR);
+}
+#endif
 
 #endif
diff --git a/lib/asan/asan_poisoning.cc b/lib/asan/asan_poisoning.cc
index dcdc7f8..b8b3aec 100644
--- a/lib/asan/asan_poisoning.cc
+++ b/lib/asan/asan_poisoning.cc
@@ -15,15 +15,16 @@
 #include "asan_interceptors.h"
 #include "asan_internal.h"
 #include "asan_mapping.h"
-#include "sanitizer/asan_interface.h"
+#include "sanitizer_common/sanitizer_libc.h"
 
 namespace __asan {
 
 void PoisonShadow(uptr addr, uptr size, u8 value) {
+  if (!flags()->poison_heap) return;
   CHECK(AddrIsAlignedByGranularity(addr));
   CHECK(AddrIsAlignedByGranularity(addr + size));
   uptr shadow_beg = MemToShadow(addr);
-  uptr shadow_end = MemToShadow(addr + size);
+  uptr shadow_end = MemToShadow(addr + size - SHADOW_GRANULARITY) + 1;
   CHECK(REAL(memset) != 0);
   REAL(memset)((void*)shadow_beg, value, shadow_end - shadow_beg);
 }
@@ -32,6 +33,7 @@
                                      uptr size,
                                      uptr redzone_size,
                                      u8 value) {
+  if (!flags()->poison_heap) return;
   CHECK(AddrIsAlignedByGranularity(addr));
   u8 *shadow = (u8*)MemToShadow(addr);
   for (uptr i = 0; i < redzone_size;
@@ -152,6 +154,33 @@
   return __asan::AddressIsPoisoned((uptr)addr);
 }
 
+uptr __asan_region_is_poisoned(uptr beg, uptr size) {
+  if (!size) return 0;
+  uptr end = beg + size;
+  if (!AddrIsInMem(beg)) return beg;
+  if (!AddrIsInMem(end)) return end;
+  uptr aligned_b = RoundUpTo(beg, SHADOW_GRANULARITY);
+  uptr aligned_e = RoundDownTo(end, SHADOW_GRANULARITY);
+  uptr shadow_beg = MemToShadow(aligned_b);
+  uptr shadow_end = MemToShadow(aligned_e);
+  // First check the first and the last application bytes,
+  // then check the SHADOW_GRANULARITY-aligned region by calling
+  // mem_is_zero on the corresponding shadow.
+  if (!__asan::AddressIsPoisoned(beg) &&
+      !__asan::AddressIsPoisoned(end - 1) &&
+      (shadow_end <= shadow_beg ||
+       __sanitizer::mem_is_zero((const char *)shadow_beg,
+                                shadow_end - shadow_beg)))
+    return 0;
+  // The fast check failed, so we have a poisoned byte somewhere.
+  // Find it slowly.
+  for (; beg < end; beg++)
+    if (__asan::AddressIsPoisoned(beg))
+      return beg;
+  UNREACHABLE("mem_is_zero returned false, but poisoned byte was not found");
+  return 0;
+}
+
 // This is a simplified version of __asan_(un)poison_memory_region, which
 // assumes that left border of region to be poisoned is properly aligned.
 static void PoisonAlignedStackMemory(uptr addr, uptr size, bool do_poison) {
@@ -168,7 +197,7 @@
     // If possible, mark all the bytes mapping to last shadow byte as
     // unaddressable.
     if (end_value > 0 && end_value <= end_offset)
-      *shadow_end = kAsanStackUseAfterScopeMagic;
+      *shadow_end = (s8)kAsanStackUseAfterScopeMagic;
   } else {
     // If necessary, mark few first bytes mapping to last shadow byte
     // as addressable
diff --git a/lib/asan/asan_preinit.cc b/lib/asan/asan_preinit.cc
new file mode 100644
index 0000000..07e0a53
--- /dev/null
+++ b/lib/asan/asan_preinit.cc
@@ -0,0 +1,29 @@
+//===-- asan_preinit.cc ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Call __asan_init at the very early stage of process startup.
+// On Linux we use .preinit_array section (unless PIC macro is defined).
+//===----------------------------------------------------------------------===//
+#include "asan_internal.h"
+
+#if ASAN_USE_PREINIT_ARRAY && !defined(PIC)
+  // On Linux, we force __asan_init to be called before anyone else
+  // by placing it into .preinit_array section.
+  // FIXME: do we have anything like this on Mac?
+  __attribute__((section(".preinit_array"), used))
+  void (*__asan_preinit)(void) =__asan_init;
+#elif defined(_WIN32) && defined(_DLL)
+  // On Windows, when using dynamic CRT (/MD), we can put a pointer
+  // to __asan_init into the global list of C initializers.
+  // See crt0dat.c in the CRT sources for the details.
+  #pragma section(".CRT$XIB", long, read)  // NOLINT
+  __declspec(allocate(".CRT$XIB")) void (*__asan_preinit)() = __asan_init;
+#endif
diff --git a/lib/asan/asan_report.cc b/lib/asan/asan_report.cc
index 558e4f2..8fa42f7 100644
--- a/lib/asan/asan_report.cc
+++ b/lib/asan/asan_report.cc
@@ -18,6 +18,9 @@
 #include "asan_stack.h"
 #include "asan_thread.h"
 #include "asan_thread_registry.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
 
 namespace __asan {
 
@@ -40,31 +43,121 @@
   }
 }
 
+// ---------------------- Decorator ------------------------------ {{{1
+bool PrintsToTtyCached() {
+  static int cached = 0;
+  static bool prints_to_tty;
+  if (!cached) {  // Ok wrt threads since we are printing only from one thread.
+    prints_to_tty = PrintsToTty();
+    cached = 1;
+  }
+  return prints_to_tty;
+}
+class Decorator: private __sanitizer::AnsiColorDecorator {
+ public:
+  Decorator() : __sanitizer::AnsiColorDecorator(PrintsToTtyCached()) { }
+  const char *Warning()    { return Red(); }
+  const char *EndWarning() { return Default(); }
+  const char *Access()     { return Blue(); }
+  const char *EndAccess()  { return Default(); }
+  const char *Location()   { return Green(); }
+  const char *EndLocation() { return Default(); }
+  const char *Allocation()  { return Magenta(); }
+  const char *EndAllocation()  { return Default(); }
+
+  const char *ShadowByte(u8 byte) {
+    switch (byte) {
+      case kAsanHeapLeftRedzoneMagic:
+      case kAsanHeapRightRedzoneMagic:
+        return Red();
+      case kAsanHeapFreeMagic:
+        return Magenta();
+      case kAsanStackLeftRedzoneMagic:
+      case kAsanStackMidRedzoneMagic:
+      case kAsanStackRightRedzoneMagic:
+      case kAsanStackPartialRedzoneMagic:
+        return Red();
+      case kAsanStackAfterReturnMagic:
+        return Magenta();
+      case kAsanInitializationOrderMagic:
+        return Cyan();
+      case kAsanUserPoisonedMemoryMagic:
+        return Blue();
+      case kAsanStackUseAfterScopeMagic:
+        return Magenta();
+      case kAsanGlobalRedzoneMagic:
+        return Red();
+      case kAsanInternalHeapMagic:
+        return Yellow();
+      default:
+        return Default();
+    }
+  }
+  const char *EndShadowByte() { return Default(); }
+};
+
 // ---------------------- Helper functions ----------------------- {{{1
 
-static void PrintBytes(const char *before, uptr *a) {
-  u8 *bytes = (u8*)a;
-  uptr byte_num = (SANITIZER_WORDSIZE) / 8;
-  Printf("%s%p:", before, (void*)a);
-  for (uptr i = 0; i < byte_num; i++) {
-    Printf(" %x%x", bytes[i] >> 4, bytes[i] & 15);
+static void PrintShadowByte(const char *before, u8 byte,
+                            const char *after = "\n") {
+  Decorator d;
+  Printf("%s%s%x%x%s%s", before,
+         d.ShadowByte(byte), byte >> 4, byte & 15, d.EndShadowByte(), after);
+}
+
+static void PrintShadowBytes(const char *before, u8 *bytes,
+                             u8 *guilty, uptr n) {
+  Decorator d;
+  if (before)
+    Printf("%s%p:", before, bytes);
+  for (uptr i = 0; i < n; i++) {
+    u8 *p = bytes + i;
+    const char *before = p == guilty ? "[" :
+        p - 1 == guilty ? "" : " ";
+    const char *after = p == guilty ? "]" : "";
+    PrintShadowByte(before, *p, after);
   }
   Printf("\n");
 }
 
+static void PrintLegend() {
+  Printf("Shadow byte legend (one shadow byte represents %d "
+         "application bytes):\n", (int)SHADOW_GRANULARITY);
+  PrintShadowByte("  Addressable:           ", 0);
+  Printf("  Partially addressable: ");
+  for (uptr i = 1; i < SHADOW_GRANULARITY; i++)
+    PrintShadowByte("", i, " ");
+  Printf("\n");
+  PrintShadowByte("  Heap left redzone:     ", kAsanHeapLeftRedzoneMagic);
+  PrintShadowByte("  Heap righ redzone:     ", kAsanHeapRightRedzoneMagic);
+  PrintShadowByte("  Freed Heap region:     ", kAsanHeapFreeMagic);
+  PrintShadowByte("  Stack left redzone:    ", kAsanStackLeftRedzoneMagic);
+  PrintShadowByte("  Stack mid redzone:     ", kAsanStackMidRedzoneMagic);
+  PrintShadowByte("  Stack right redzone:   ", kAsanStackRightRedzoneMagic);
+  PrintShadowByte("  Stack partial redzone: ", kAsanStackPartialRedzoneMagic);
+  PrintShadowByte("  Stack after return:    ", kAsanStackAfterReturnMagic);
+  PrintShadowByte("  Stack use after scope: ", kAsanStackUseAfterScopeMagic);
+  PrintShadowByte("  Global redzone:        ", kAsanGlobalRedzoneMagic);
+  PrintShadowByte("  Global init order:     ", kAsanInitializationOrderMagic);
+  PrintShadowByte("  Poisoned by user:      ", kAsanUserPoisonedMemoryMagic);
+  PrintShadowByte("  ASan internal:         ", kAsanInternalHeapMagic);
+}
+
 static void PrintShadowMemoryForAddress(uptr addr) {
   if (!AddrIsInMem(addr))
     return;
   uptr shadow_addr = MemToShadow(addr);
-  Printf("Shadow byte and word:\n");
-  Printf("  %p: %x\n", (void*)shadow_addr, *(unsigned char*)shadow_addr);
-  uptr aligned_shadow = shadow_addr & ~(kWordSize - 1);
-  PrintBytes("  ", (uptr*)(aligned_shadow));
-  Printf("More shadow bytes:\n");
-  for (int i = -4; i <= 4; i++) {
+  const uptr n_bytes_per_row = 16;
+  uptr aligned_shadow = shadow_addr & ~(n_bytes_per_row - 1);
+  Printf("Shadow bytes around the buggy address:\n");
+  for (int i = -5; i <= 5; i++) {
     const char *prefix = (i == 0) ? "=>" : "  ";
-    PrintBytes(prefix, (uptr*)(aligned_shadow + i * kWordSize));
+    PrintShadowBytes(prefix,
+                     (u8*)(aligned_shadow + i * n_bytes_per_row),
+                     (u8*)shadow_addr, n_bytes_per_row);
   }
+  if (flags()->print_legend)
+    PrintLegend();
 }
 
 static void PrintZoneForPointer(uptr ptr, uptr zone_ptr,
@@ -97,19 +190,27 @@
   Printf("  '%s' is ascii string '%s'\n", g.name, (char*)g.beg);
 }
 
-bool DescribeAddressRelativeToGlobal(uptr addr, const __asan_global &g) {
-  if (addr < g.beg - kGlobalAndStackRedzone) return false;
+bool DescribeAddressRelativeToGlobal(uptr addr, uptr size,
+                                     const __asan_global &g) {
+  static const uptr kMinimalDistanceFromAnotherGlobal = 64;
+  if (addr <= g.beg - kMinimalDistanceFromAnotherGlobal) return false;
   if (addr >= g.beg + g.size_with_redzone) return false;
-  Printf("%p is located ", (void*)addr);
+  Decorator d;
+  Printf("%s", d.Location());
   if (addr < g.beg) {
-    Printf("%zd bytes to the left", g.beg - addr);
-  } else if (addr >= g.beg + g.size) {
-    Printf("%zd bytes to the right", addr - (g.beg + g.size));
+    Printf("%p is located %zd bytes to the left", (void*)addr, g.beg - addr);
+  } else if (addr + size > g.beg + g.size) {
+    if (addr < g.beg + g.size)
+      addr = g.beg + g.size;
+    Printf("%p is located %zd bytes to the right", (void*)addr,
+           addr - (g.beg + g.size));
   } else {
-    Printf("%zd bytes inside", addr - g.beg);  // Can it happen?
+    // Can it happen?
+    Printf("%p is located %zd bytes inside", (void*)addr, addr - g.beg);
   }
   Printf(" of global variable '%s' (0x%zx) of size %zu\n",
              g.name, g.beg, g.size);
+  Printf("%s", d.EndLocation());
   PrintGlobalNameIfASCII(g);
   return true;
 }
@@ -153,9 +254,12 @@
   internal_strncat(buf, frame_descr,
                    Min(kBufSize,
                        static_cast<sptr>(name_end - frame_descr)));
+  Decorator d;
+  Printf("%s", d.Location());
   Printf("Address %p is located at offset %zu "
              "in frame <%s> of T%d's stack:\n",
-             (void*)addr, offset, buf, t->tid());
+             (void*)addr, offset, Demangle(buf), t->tid());
+  Printf("%s", d.EndLocation());
   // Report the number of stack objects.
   char *p;
   uptr n_objects = internal_simple_strtoll(name_end, &p, 10);
@@ -188,19 +292,26 @@
 
 static void DescribeAccessToHeapChunk(AsanChunkView chunk, uptr addr,
                                       uptr access_size) {
-  uptr offset;
-  Printf("%p is located ", (void*)addr);
-  if (chunk.AddrIsInside(addr, access_size, &offset)) {
-    Printf("%zu bytes inside of", offset);
-  } else if (chunk.AddrIsAtLeft(addr, access_size, &offset)) {
-    Printf("%zu bytes to the left of", offset);
+  sptr offset;
+  Decorator d;
+  Printf("%s", d.Location());
+  if (chunk.AddrIsAtLeft(addr, access_size, &offset)) {
+    Printf("%p is located %zd bytes to the left of", (void*)addr, offset);
   } else if (chunk.AddrIsAtRight(addr, access_size, &offset)) {
-    Printf("%zu bytes to the right of", offset);
+    if (offset < 0) {
+      addr -= offset;
+      offset = 0;
+    }
+    Printf("%p is located %zd bytes to the right of", (void*)addr, offset);
+  } else if (chunk.AddrIsInside(addr, access_size, &offset)) {
+    Printf("%p is located %zd bytes inside of", (void*)addr, offset);
   } else {
-    Printf(" somewhere around (this is AddressSanitizer bug!)");
+    Printf("%p is located somewhere around (this is AddressSanitizer bug!)",
+           (void*)addr);
   }
   Printf(" %zu-byte region [%p,%p)\n", chunk.UsedSize(),
          (void*)(chunk.Beg()), (void*)(chunk.End()));
+  Printf("%s", d.EndLocation());
 }
 
 // Return " (thread_name) " or an empty string if the name is empty.
@@ -234,24 +345,30 @@
   AsanThread *t = asanThreadRegistry().GetCurrent();
   CHECK(t);
   char tname[128];
+  Decorator d;
   if (chunk.FreeTid() != kInvalidTid) {
     AsanThreadSummary *free_thread =
         asanThreadRegistry().FindByTid(chunk.FreeTid());
-    Printf("freed by thread T%d%s here:\n", free_thread->tid(),
-           ThreadNameWithParenthesis(free_thread, tname, sizeof(tname)));
+    Printf("%sfreed by thread T%d%s here:%s\n", d.Allocation(),
+           free_thread->tid(),
+           ThreadNameWithParenthesis(free_thread, tname, sizeof(tname)),
+           d.EndAllocation());
     StackTrace free_stack;
     chunk.GetFreeStack(&free_stack);
     PrintStack(&free_stack);
-    Printf("previously allocated by thread T%d%s here:\n",
-           alloc_thread->tid(),
-           ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)));
+    Printf("%spreviously allocated by thread T%d%s here:%s\n",
+           d.Allocation(), alloc_thread->tid(),
+           ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
+           d.EndAllocation());
     PrintStack(&alloc_stack);
     DescribeThread(t->summary());
     DescribeThread(free_thread);
     DescribeThread(alloc_thread);
   } else {
-    Printf("allocated by thread T%d%s here:\n", alloc_thread->tid(),
-           ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)));
+    Printf("%sallocated by thread T%d%s here:%s\n", d.Allocation(),
+           alloc_thread->tid(),
+           ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
+           d.EndAllocation());
     PrintStack(&alloc_stack);
     DescribeThread(t->summary());
     DescribeThread(alloc_thread);
@@ -263,7 +380,7 @@
   if (DescribeAddressIfShadow(addr))
     return;
   CHECK(AddrIsInMem(addr));
-  if (DescribeAddressIfGlobal(addr))
+  if (DescribeAddressIfGlobal(addr, access_size))
     return;
   if (DescribeAddressIfStack(addr, access_size))
     return;
@@ -318,9 +435,9 @@
         // an error report will finish doing it.
         SleepForSeconds(Max(100, flags()->sleep_before_dying + 1));
       }
-      // If we're still not dead for some reason, use raw Exit() instead of
+      // If we're still not dead for some reason, use raw _exit() instead of
       // Die() to bypass any additional checks.
-      Exit(flags()->exitcode);
+      internal__exit(flags()->exitcode);
     }
     ASAN_ON_ERROR();
     reporting_thread_tid = asanThreadRegistry().GetCurrentTidOrInvalid();
@@ -342,7 +459,8 @@
       DescribeThread(curr_thread->summary());
     }
     // Print memory stats.
-    __asan_print_accumulated_stats();
+    if (flags()->print_stats)
+      __asan_print_accumulated_stats();
     if (error_report_callback) {
       error_report_callback(error_message_buffer);
     }
@@ -351,60 +469,121 @@
   }
 };
 
+static void ReportSummary(const char *error_type, StackTrace *stack) {
+  if (!stack->size) return;
+  if (IsSymbolizerAvailable()) {
+    AddressInfo ai;
+    // Currently, we include the first stack frame into the report summary.
+    // Maybe sometimes we need to choose another frame (e.g. skip memcpy/etc).
+    SymbolizeCode(stack->trace[0], &ai, 1);
+    ReportErrorSummary(error_type,
+                       StripPathPrefix(ai.file, flags()->strip_path_prefix),
+                       ai.line, ai.function);
+  }
+  // FIXME: do we need to print anything at all if there is no symbolizer?
+}
+
 void ReportSIGSEGV(uptr pc, uptr sp, uptr bp, uptr addr) {
   ScopedInErrorReport in_report;
+  Decorator d;
+  Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: SEGV on unknown address %p"
              " (pc %p sp %p bp %p T%d)\n",
              (void*)addr, (void*)pc, (void*)sp, (void*)bp,
              asanThreadRegistry().GetCurrentTidOrInvalid());
+  Printf("%s", d.EndWarning());
   Printf("AddressSanitizer can not provide additional info.\n");
-  GET_STACK_TRACE_WITH_PC_AND_BP(kStackTraceMax, pc, bp);
+  GET_STACK_TRACE_FATAL(pc, bp);
   PrintStack(&stack);
+  ReportSummary("SEGV", &stack);
 }
 
 void ReportDoubleFree(uptr addr, StackTrace *stack) {
   ScopedInErrorReport in_report;
+  Decorator d;
+  Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: attempting double-free on %p:\n", addr);
+  Printf("%s", d.EndWarning());
   PrintStack(stack);
   DescribeHeapAddress(addr, 1);
+  ReportSummary("double-free", stack);
 }
 
 void ReportFreeNotMalloced(uptr addr, StackTrace *stack) {
   ScopedInErrorReport in_report;
+  Decorator d;
+  Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: attempting free on address "
              "which was not malloc()-ed: %p\n", addr);
+  Printf("%s", d.EndWarning());
   PrintStack(stack);
   DescribeHeapAddress(addr, 1);
+  ReportSummary("bad-free", stack);
+}
+
+void ReportAllocTypeMismatch(uptr addr, StackTrace *stack,
+                             AllocType alloc_type,
+                             AllocType dealloc_type) {
+  static const char *alloc_names[] =
+    {"INVALID", "malloc", "operator new", "operator new []"};
+  static const char *dealloc_names[] =
+    {"INVALID", "free", "operator delete", "operator delete []"};
+  CHECK_NE(alloc_type, dealloc_type);
+  ScopedInErrorReport in_report;
+  Decorator d;
+  Printf("%s", d.Warning());
+  Report("ERROR: AddressSanitizer: alloc-dealloc-mismatch (%s vs %s) on %p\n",
+        alloc_names[alloc_type], dealloc_names[dealloc_type], addr);
+  Printf("%s", d.EndWarning());
+  PrintStack(stack);
+  DescribeHeapAddress(addr, 1);
+  ReportSummary("alloc-dealloc-mismatch", stack);
+  Report("HINT: if you don't care about these warnings you may set "
+         "ASAN_OPTIONS=alloc_dealloc_mismatch=0\n");
 }
 
 void ReportMallocUsableSizeNotOwned(uptr addr, StackTrace *stack) {
   ScopedInErrorReport in_report;
+  Decorator d;
+  Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: attempting to call "
              "malloc_usable_size() for pointer which is "
              "not owned: %p\n", addr);
+  Printf("%s", d.EndWarning());
   PrintStack(stack);
   DescribeHeapAddress(addr, 1);
+  ReportSummary("bad-malloc_usable_size", stack);
 }
 
 void ReportAsanGetAllocatedSizeNotOwned(uptr addr, StackTrace *stack) {
   ScopedInErrorReport in_report;
+  Decorator d;
+  Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: attempting to call "
              "__asan_get_allocated_size() for pointer which is "
              "not owned: %p\n", addr);
+  Printf("%s", d.EndWarning());
   PrintStack(stack);
   DescribeHeapAddress(addr, 1);
+  ReportSummary("bad-__asan_get_allocated_size", stack);
 }
 
 void ReportStringFunctionMemoryRangesOverlap(
     const char *function, const char *offset1, uptr length1,
     const char *offset2, uptr length2, StackTrace *stack) {
   ScopedInErrorReport in_report;
-  Report("ERROR: AddressSanitizer: %s-param-overlap: "
+  Decorator d;
+  char bug_type[100];
+  internal_snprintf(bug_type, sizeof(bug_type), "%s-param-overlap", function);
+  Printf("%s", d.Warning());
+  Report("ERROR: AddressSanitizer: %s: "
              "memory ranges [%p,%p) and [%p, %p) overlap\n", \
-             function, offset1, offset1 + length1, offset2, offset2 + length2);
+             bug_type, offset1, offset1 + length1, offset2, offset2 + length2);
+  Printf("%s", d.EndWarning());
   PrintStack(stack);
   DescribeAddress((uptr)offset1, length1);
   DescribeAddress((uptr)offset2, length2);
+  ReportSummary(bug_type, stack);
 }
 
 // ----------------------- Mac-specific reports ----------------- {{{1
@@ -494,23 +673,27 @@
         break;
     }
   }
-
+  Decorator d;
+  Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: %s on address "
              "%p at pc 0x%zx bp 0x%zx sp 0x%zx\n",
              bug_descr, (void*)addr, pc, bp, sp);
+  Printf("%s", d.EndWarning());
 
   u32 curr_tid = asanThreadRegistry().GetCurrentTidOrInvalid();
   char tname[128];
-  Printf("%s of size %zu at %p thread T%d%s\n",
-             access_size ? (is_write ? "WRITE" : "READ") : "ACCESS",
-             access_size, (void*)addr, curr_tid,
-             ThreadNameWithParenthesis(curr_tid, tname, sizeof(tname)));
+  Printf("%s%s of size %zu at %p thread T%d%s%s\n",
+         d.Access(),
+         access_size ? (is_write ? "WRITE" : "READ") : "ACCESS",
+         access_size, (void*)addr, curr_tid,
+         ThreadNameWithParenthesis(curr_tid, tname, sizeof(tname)),
+         d.EndAccess());
 
-  GET_STACK_TRACE_WITH_PC_AND_BP(kStackTraceMax, pc, bp);
+  GET_STACK_TRACE_FATAL(pc, bp);
   PrintStack(&stack);
 
   DescribeAddress(addr, access_size);
-
+  ReportSummary(bug_descr, &stack);
   PrintShadowMemoryForAddress(addr);
 }
 
@@ -524,6 +707,10 @@
   }
 }
 
+void __asan_describe_address(uptr addr) {
+  DescribeAddress(addr, 1);
+}
+
 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
 // Provide default implementation of __asan_on_error that does nothing
 // and may be overriden by user.
diff --git a/lib/asan/asan_report.h b/lib/asan/asan_report.h
index dc3bf9b..55a8039 100644
--- a/lib/asan/asan_report.h
+++ b/lib/asan/asan_report.h
@@ -12,17 +12,18 @@
 // ASan-private header for error reporting functions.
 //===----------------------------------------------------------------------===//
 
+#include "asan_allocator.h"
 #include "asan_internal.h"
 #include "asan_thread.h"
-#include "sanitizer/asan_interface.h"
 
 namespace __asan {
 
 // The following functions prints address description depending
 // on the memory type (shadow/heap/stack/global).
 void DescribeHeapAddress(uptr addr, uptr access_size);
-bool DescribeAddressIfGlobal(uptr addr);
-bool DescribeAddressRelativeToGlobal(uptr addr, const __asan_global &g);
+bool DescribeAddressIfGlobal(uptr addr, uptr access_size);
+bool DescribeAddressRelativeToGlobal(uptr addr, uptr access_size,
+                                     const __asan_global &g);
 bool DescribeAddressIfShadow(uptr addr);
 bool DescribeAddressIfStack(uptr addr, uptr access_size);
 // Determines memory type on its own.
@@ -34,6 +35,9 @@
 void NORETURN ReportSIGSEGV(uptr pc, uptr sp, uptr bp, uptr addr);
 void NORETURN ReportDoubleFree(uptr addr, StackTrace *stack);
 void NORETURN ReportFreeNotMalloced(uptr addr, StackTrace *stack);
+void NORETURN ReportAllocTypeMismatch(uptr addr, StackTrace *stack,
+                                      AllocType alloc_type,
+                                      AllocType dealloc_type);
 void NORETURN ReportMallocUsableSizeNotOwned(uptr addr,
                                              StackTrace *stack);
 void NORETURN ReportAsanGetAllocatedSizeNotOwned(uptr addr,
diff --git a/lib/asan/asan_rtl.cc b/lib/asan/asan_rtl.cc
index 37c9583..d7aebd4 100644
--- a/lib/asan/asan_rtl.cc
+++ b/lib/asan/asan_rtl.cc
@@ -14,14 +14,12 @@
 #include "asan_allocator.h"
 #include "asan_interceptors.h"
 #include "asan_internal.h"
-#include "asan_lock.h"
 #include "asan_mapping.h"
 #include "asan_report.h"
 #include "asan_stack.h"
 #include "asan_stats.h"
 #include "asan_thread.h"
 #include "asan_thread_registry.h"
-#include "sanitizer/asan_interface.h"
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_libc.h"
@@ -29,6 +27,8 @@
 
 namespace __asan {
 
+uptr AsanMappingProfile[kAsanMappingProfileSize];
+
 static void AsanDie() {
   static atomic_uint32_t num_calls;
   if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) != 0) {
@@ -39,13 +39,19 @@
     Report("Sleeping for %d second(s)\n", flags()->sleep_before_dying);
     SleepForSeconds(flags()->sleep_before_dying);
   }
-  if (flags()->unmap_shadow_on_exit)
-    UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
+  if (flags()->unmap_shadow_on_exit) {
+    if (kMidMemBeg) {
+      UnmapOrDie((void*)kLowShadowBeg, kMidMemBeg - kLowShadowBeg);
+      UnmapOrDie((void*)kMidMemEnd, kHighShadowEnd - kMidMemEnd);
+    } else {
+      UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
+    }
+  }
   if (death_callback)
     death_callback();
   if (flags()->abort_on_error)
     Abort();
-  Exit(flags()->exitcode);
+  internal__exit(flags()->exitcode);
 }
 
 static void AsanCheckFailed(const char *file, int line, const char *cond,
@@ -54,7 +60,7 @@
              file, line, cond, (uptr)v1, (uptr)v2);
   // FIXME: check for infinite recursion without a thread-local counter here.
   PRINT_CURRENT_STACK();
-  ShowStatsAndAbort();
+  Die();
 }
 
 // -------------------------- Flags ------------------------- {{{1
@@ -70,6 +76,17 @@
   return (&__asan_default_options) ? __asan_default_options() : "";
 }
 
+static const char *MaybeUseAsanDefaultOptionsCompileDefiniton() {
+#ifdef ASAN_DEFAULT_OPTIONS
+// Stringize the macro value.
+# define ASAN_STRINGIZE(x) #x
+# define ASAN_STRINGIZE_OPTIONS(options) ASAN_STRINGIZE(options)
+  return ASAN_STRINGIZE_OPTIONS(ASAN_DEFAULT_OPTIONS);
+#else
+  return "";
+#endif
+}
+
 static void ParseFlagsFromString(Flags *f, const char *str) {
   ParseFlag(str, &f->quarantine_size, "quarantine_size");
   ParseFlag(str, &f->symbolize, "symbolize");
@@ -86,7 +103,6 @@
 
   ParseFlag(str, &f->replace_str, "replace_str");
   ParseFlag(str, &f->replace_intrin, "replace_intrin");
-  ParseFlag(str, &f->replace_cfallocator, "replace_cfallocator");
   ParseFlag(str, &f->mac_ignore_invalid_free, "mac_ignore_invalid_free");
   ParseFlag(str, &f->use_fake_stack, "use_fake_stack");
   ParseFlag(str, &f->max_malloc_fill_size, "max_malloc_fill_size");
@@ -98,12 +114,20 @@
   ParseFlag(str, &f->check_malloc_usable_size, "check_malloc_usable_size");
   ParseFlag(str, &f->unmap_shadow_on_exit, "unmap_shadow_on_exit");
   ParseFlag(str, &f->abort_on_error, "abort_on_error");
+  ParseFlag(str, &f->print_stats, "print_stats");
+  ParseFlag(str, &f->print_legend, "print_legend");
   ParseFlag(str, &f->atexit, "atexit");
   ParseFlag(str, &f->disable_core, "disable_core");
   ParseFlag(str, &f->strip_path_prefix, "strip_path_prefix");
   ParseFlag(str, &f->allow_reexec, "allow_reexec");
   ParseFlag(str, &f->print_full_thread_history, "print_full_thread_history");
   ParseFlag(str, &f->log_path, "log_path");
+  ParseFlag(str, &f->fast_unwind_on_fatal, "fast_unwind_on_fatal");
+  ParseFlag(str, &f->fast_unwind_on_malloc, "fast_unwind_on_malloc");
+  ParseFlag(str, &f->poison_heap, "poison_heap");
+  ParseFlag(str, &f->alloc_dealloc_mismatch, "alloc_dealloc_mismatch");
+  ParseFlag(str, &f->use_stack_depot, "use_stack_depot");
+  ParseFlag(str, &f->strict_memcmp, "strict_memcmp");
 }
 
 void InitializeFlags(Flags *f, const char *env) {
@@ -112,14 +136,13 @@
   f->quarantine_size = (ASAN_LOW_MEMORY) ? 1UL << 26 : 1UL << 28;
   f->symbolize = false;
   f->verbosity = 0;
-  f->redzone = (ASAN_LOW_MEMORY) ? 64 : 128;
+  f->redzone = ASAN_ALLOCATOR_VERSION == 2 ? 16 : (ASAN_LOW_MEMORY) ? 64 : 128;
   f->debug = false;
   f->report_globals = 1;
   f->check_initialization_order = true;
   f->malloc_context_size = kDeafultMallocContextSize;
   f->replace_str = true;
   f->replace_intrin = true;
-  f->replace_cfallocator = true;
   f->mac_ignore_invalid_free = false;
   f->use_fake_stack = true;
   f->max_malloc_fill_size = 0;
@@ -131,12 +154,25 @@
   f->check_malloc_usable_size = true;
   f->unmap_shadow_on_exit = false;
   f->abort_on_error = false;
+  f->print_stats = false;
+  f->print_legend = true;
   f->atexit = false;
   f->disable_core = (SANITIZER_WORDSIZE == 64);
   f->strip_path_prefix = "";
   f->allow_reexec = true;
   f->print_full_thread_history = true;
   f->log_path = 0;
+  f->fast_unwind_on_fatal = false;
+  f->fast_unwind_on_malloc = true;
+  f->poison_heap = true;
+  // Turn off alloc/dealloc mismatch checker on Mac for now.
+  // TODO(glider): Fix known issues and enable this back.
+  f->alloc_dealloc_mismatch = (ASAN_MAC == 0);;
+  f->use_stack_depot = true;  // Only affects allocator2.
+  f->strict_memcmp = true;
+
+  // Override from compile definition.
+  ParseFlagsFromString(f, MaybeUseAsanDefaultOptionsCompileDefiniton());
 
   // Override from user-specified string.
   ParseFlagsFromString(f, MaybeCallAsanDefaultOptions());
@@ -154,6 +190,10 @@
 bool asan_init_is_running;
 void (*death_callback)(void);
 
+#if !ASAN_FIXED_MAPPING
+uptr kHighMemEnd, kMidMemBeg, kMidMemEnd;
+#endif
+
 // -------------------------- Misc ---------------- {{{1
 void ShowStatsAndAbort() {
   __asan_print_accumulated_stats();
@@ -200,6 +240,17 @@
 ASAN_REPORT_ERROR(store, true, 8)
 ASAN_REPORT_ERROR(store, true, 16)
 
+#define ASAN_REPORT_ERROR_N(type, is_write)                    \
+extern "C" NOINLINE INTERFACE_ATTRIBUTE                        \
+void __asan_report_ ## type ## _n(uptr addr, uptr size);       \
+void __asan_report_ ## type ## _n(uptr addr, uptr size) {      \
+  GET_CALLER_PC_BP_SP;                                         \
+  __asan_report_error(pc, bp, sp, addr, is_write, size);       \
+}
+
+ASAN_REPORT_ERROR_N(load, false)
+ASAN_REPORT_ERROR_N(store, true)
+
 // Force the linker to keep the symbols for various ASan interface functions.
 // We want to keep those in the executable in order to let the instrumented
 // dynamic libraries access the symbol even if it is not used by the executable
@@ -220,7 +271,6 @@
     case 8: __asan_report_store4(0); break;
     case 9: __asan_report_store8(0); break;
     case 10: __asan_report_store16(0); break;
-    case 11: __asan_register_global(0, 0, 0); break;
     case 12: __asan_register_globals(0, 0); break;
     case 13: __asan_unregister_globals(0, 0); break;
     case 14: __asan_set_death_callback(0); break;
@@ -243,12 +293,90 @@
     case 31: __asan_after_dynamic_init(); break;
     case 32: __asan_poison_stack_memory(0, 0); break;
     case 33: __asan_unpoison_stack_memory(0, 0); break;
+    case 34: __asan_region_is_poisoned(0, 0); break;
+    case 35: __asan_describe_address(0); break;
   }
 }
 
 static void asan_atexit() {
   Printf("AddressSanitizer exit stats:\n");
   __asan_print_accumulated_stats();
+  // Print AsanMappingProfile.
+  for (uptr i = 0; i < kAsanMappingProfileSize; i++) {
+    if (AsanMappingProfile[i] == 0) continue;
+    Printf("asan_mapping.h:%zd -- %zd\n", i, AsanMappingProfile[i]);
+  }
+}
+
+static void InitializeHighMemEnd() {
+#if !ASAN_FIXED_MAPPING
+#if SANITIZER_WORDSIZE == 64
+# if defined(__powerpc64__)
+  // FIXME:
+  // On PowerPC64 we have two different address space layouts: 44- and 46-bit.
+  // We somehow need to figure our which one we are using now and choose
+  // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL.
+  // Note that with 'ulimit -s unlimited' the stack is moved away from the top
+  // of the address space, so simply checking the stack address is not enough.
+  kHighMemEnd = (1ULL << 44) - 1;  // 0x00000fffffffffffUL
+# else
+  kHighMemEnd = (1ULL << 47) - 1;  // 0x00007fffffffffffUL;
+# endif
+#else  // SANITIZER_WORDSIZE == 32
+  kHighMemEnd = (1ULL << 32) - 1;  // 0xffffffff;
+#endif  // SANITIZER_WORDSIZE
+#endif  // !ASAN_FIXED_MAPPING
+}
+
+static void ProtectGap(uptr a, uptr size) {
+  CHECK_EQ(a, (uptr)Mprotect(a, size));
+}
+
+static void PrintAddressSpaceLayout() {
+  Printf("|| `[%p, %p]` || HighMem    ||\n",
+         (void*)kHighMemBeg, (void*)kHighMemEnd);
+  Printf("|| `[%p, %p]` || HighShadow ||\n",
+         (void*)kHighShadowBeg, (void*)kHighShadowEnd);
+  if (kMidMemBeg) {
+    Printf("|| `[%p, %p]` || ShadowGap3 ||\n",
+           (void*)kShadowGap3Beg, (void*)kShadowGap3End);
+    Printf("|| `[%p, %p]` || MidMem     ||\n",
+           (void*)kMidMemBeg, (void*)kMidMemEnd);
+    Printf("|| `[%p, %p]` || ShadowGap2 ||\n",
+           (void*)kShadowGap2Beg, (void*)kShadowGap2End);
+    Printf("|| `[%p, %p]` || MidShadow  ||\n",
+           (void*)kMidShadowBeg, (void*)kMidShadowEnd);
+  }
+  Printf("|| `[%p, %p]` || ShadowGap  ||\n",
+         (void*)kShadowGapBeg, (void*)kShadowGapEnd);
+  if (kLowShadowBeg) {
+    Printf("|| `[%p, %p]` || LowShadow  ||\n",
+           (void*)kLowShadowBeg, (void*)kLowShadowEnd);
+    Printf("|| `[%p, %p]` || LowMem     ||\n",
+           (void*)kLowMemBeg, (void*)kLowMemEnd);
+  }
+  Printf("MemToShadow(shadow): %p %p %p %p",
+         (void*)MEM_TO_SHADOW(kLowShadowBeg),
+         (void*)MEM_TO_SHADOW(kLowShadowEnd),
+         (void*)MEM_TO_SHADOW(kHighShadowBeg),
+         (void*)MEM_TO_SHADOW(kHighShadowEnd));
+  if (kMidMemBeg) {
+    Printf(" %p %p",
+           (void*)MEM_TO_SHADOW(kMidShadowBeg),
+           (void*)MEM_TO_SHADOW(kMidShadowEnd));
+  }
+  Printf("\n");
+  Printf("red_zone=%zu\n", (uptr)flags()->redzone);
+  Printf("malloc_context_size=%zu\n", (uptr)flags()->malloc_context_size);
+
+  Printf("SHADOW_SCALE: %zx\n", (uptr)SHADOW_SCALE);
+  Printf("SHADOW_GRANULARITY: %zx\n", (uptr)SHADOW_GRANULARITY);
+  Printf("SHADOW_OFFSET: %zx\n", (uptr)SHADOW_OFFSET);
+  CHECK(SHADOW_SCALE >= 3 && SHADOW_SCALE <= 7);
+  if (kMidMemBeg)
+    CHECK(kMidShadowBeg > kLowShadowEnd &&
+          kMidMemBeg > kMidShadowEnd &&
+          kHighShadowBeg > kMidMemEnd);
 }
 
 }  // namespace __asan
@@ -285,8 +413,10 @@
 
 void __asan_init() {
   if (asan_inited) return;
+  SanitizerToolName = "AddressSanitizer";
   CHECK(!asan_init_is_running && "ASan init calls itself!");
   asan_init_is_running = true;
+  InitializeHighMemEnd();
 
   // Make sure we are not statically linked.
   AsanDoesNotSupportStaticLinkage();
@@ -322,49 +452,48 @@
   ReplaceSystemMalloc();
   ReplaceOperatorsNewAndDelete();
 
-  if (flags()->verbosity) {
-    Printf("|| `[%p, %p]` || HighMem    ||\n",
-           (void*)kHighMemBeg, (void*)kHighMemEnd);
-    Printf("|| `[%p, %p]` || HighShadow ||\n",
-           (void*)kHighShadowBeg, (void*)kHighShadowEnd);
-    Printf("|| `[%p, %p]` || ShadowGap  ||\n",
-           (void*)kShadowGapBeg, (void*)kShadowGapEnd);
-    Printf("|| `[%p, %p]` || LowShadow  ||\n",
-           (void*)kLowShadowBeg, (void*)kLowShadowEnd);
-    Printf("|| `[%p, %p]` || LowMem     ||\n",
-           (void*)kLowMemBeg, (void*)kLowMemEnd);
-    Printf("MemToShadow(shadow): %p %p %p %p\n",
-           (void*)MEM_TO_SHADOW(kLowShadowBeg),
-           (void*)MEM_TO_SHADOW(kLowShadowEnd),
-           (void*)MEM_TO_SHADOW(kHighShadowBeg),
-           (void*)MEM_TO_SHADOW(kHighShadowEnd));
-    Printf("red_zone=%zu\n", (uptr)flags()->redzone);
-    Printf("malloc_context_size=%zu\n", (uptr)flags()->malloc_context_size);
+  uptr shadow_start = kLowShadowBeg;
+  if (kLowShadowBeg) shadow_start -= GetMmapGranularity();
+  uptr shadow_end = kHighShadowEnd;
+  bool full_shadow_is_available =
+      MemoryRangeIsAvailable(shadow_start, shadow_end);
 
-    Printf("SHADOW_SCALE: %zx\n", (uptr)SHADOW_SCALE);
-    Printf("SHADOW_GRANULARITY: %zx\n", (uptr)SHADOW_GRANULARITY);
-    Printf("SHADOW_OFFSET: %zx\n", (uptr)SHADOW_OFFSET);
-    CHECK(SHADOW_SCALE >= 3 && SHADOW_SCALE <= 7);
+#if ASAN_LINUX && defined(__x86_64__) && !ASAN_FIXED_MAPPING
+  if (!full_shadow_is_available) {
+    kMidMemBeg = kLowMemEnd < 0x3000000000ULL ? 0x3000000000ULL : 0;
+    kMidMemEnd = kLowMemEnd < 0x3000000000ULL ? 0x4fffffffffULL : 0;
   }
+#endif
+
+  if (flags()->verbosity)
+    PrintAddressSpaceLayout();
 
   if (flags()->disable_core) {
     DisableCoreDumper();
   }
 
-  uptr shadow_start = kLowShadowBeg;
-  if (kLowShadowBeg > 0) shadow_start -= GetMmapGranularity();
-  uptr shadow_end = kHighShadowEnd;
-  if (MemoryRangeIsAvailable(shadow_start, shadow_end)) {
-    if (kLowShadowBeg != kLowShadowEnd) {
-      // mmap the low shadow plus at least one page.
-      ReserveShadowMemoryRange(kLowShadowBeg - GetMmapGranularity(),
-                               kLowShadowEnd);
-    }
+  if (full_shadow_is_available) {
+    // mmap the low shadow plus at least one page at the left.
+    if (kLowShadowBeg)
+      ReserveShadowMemoryRange(shadow_start, kLowShadowEnd);
     // mmap the high shadow.
     ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd);
-    // protect the gap
-    void *prot = Mprotect(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
-    CHECK(prot == (void*)kShadowGapBeg);
+    // protect the gap.
+    ProtectGap(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
+  } else if (kMidMemBeg &&
+      MemoryRangeIsAvailable(shadow_start, kMidMemBeg - 1) &&
+      MemoryRangeIsAvailable(kMidMemEnd + 1, shadow_end)) {
+    CHECK(kLowShadowBeg != kLowShadowEnd);
+    // mmap the low shadow plus at least one page at the left.
+    ReserveShadowMemoryRange(shadow_start, kLowShadowEnd);
+    // mmap the mid shadow.
+    ReserveShadowMemoryRange(kMidShadowBeg, kMidShadowEnd);
+    // mmap the high shadow.
+    ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd);
+    // protect the gaps.
+    ProtectGap(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
+    ProtectGap(kShadowGap2Beg, kShadowGap2End - kShadowGap2Beg + 1);
+    ProtectGap(kShadowGap3Beg, kShadowGap3End - kShadowGap3Beg + 1);
   } else {
     Report("Shadow memory range interleaves with an existing memory mapping. "
            "ASan cannot proceed correctly. ABORTING.\n");
@@ -390,21 +519,9 @@
   asanThreadRegistry().GetMain()->ThreadStart();
   force_interface_symbols();  // no-op.
 
+  InitializeAllocator();
+
   if (flags()->verbosity) {
     Report("AddressSanitizer Init done\n");
   }
 }
-
-#if defined(ASAN_USE_PREINIT_ARRAY)
-  // On Linux, we force __asan_init to be called before anyone else
-  // by placing it into .preinit_array section.
-  // FIXME: do we have anything like this on Mac?
-  __attribute__((section(".preinit_array")))
-    typeof(__asan_init) *__asan_preinit =__asan_init;
-#elif defined(_WIN32) && defined(_DLL)
-  // On Windows, when using dynamic CRT (/MD), we can put a pointer
-  // to __asan_init into the global list of C initializers.
-  // See crt0dat.c in the CRT sources for the details.
-  #pragma section(".CRT$XIB", long, read)  // NOLINT
-  __declspec(allocate(".CRT$XIB")) void (*__asan_preinit)() = __asan_init;
-#endif
diff --git a/lib/asan/asan_stack.cc b/lib/asan/asan_stack.cc
index ebf22fd..a50ab1d 100644
--- a/lib/asan/asan_stack.cc
+++ b/lib/asan/asan_stack.cc
@@ -11,9 +11,9 @@
 //
 // Code for ASan stack trace.
 //===----------------------------------------------------------------------===//
+#include "asan_internal.h"
 #include "asan_flags.h"
 #include "asan_stack.h"
-#include "sanitizer/asan_interface.h"
 
 namespace __asan {
 
diff --git a/lib/asan/asan_stack.h b/lib/asan/asan_stack.h
index 99f2cac..46c9f34 100644
--- a/lib/asan/asan_stack.h
+++ b/lib/asan/asan_stack.h
@@ -15,10 +15,11 @@
 #define ASAN_STACK_H
 
 #include "sanitizer_common/sanitizer_stacktrace.h"
+#include "asan_flags.h"
 
 namespace __asan {
 
-void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp);
+void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp, bool fast);
 void PrintStack(StackTrace *stack);
 
 }  // namespace __asan
@@ -27,27 +28,38 @@
 // The pc will be in the position 0 of the resulting stack trace.
 // The bp may refer to the current frame or to the caller's frame.
 // fast_unwind is currently unused.
-#define GET_STACK_TRACE_WITH_PC_AND_BP(max_s, pc, bp)               \
+#define GET_STACK_TRACE_WITH_PC_AND_BP(max_s, pc, bp, fast)     \
   StackTrace stack;                                             \
-  GetStackTrace(&stack, max_s, pc, bp)
+  GetStackTrace(&stack, max_s, pc, bp, fast)
 
 // NOTE: A Rule of thumb is to retrieve stack trace in the interceptors
 // as early as possible (in functions exposed to the user), as we generally
 // don't want stack trace to contain functions from ASan internals.
 
-#define GET_STACK_TRACE_HERE(max_size)                        \
+#define GET_STACK_TRACE(max_size, fast)                       \
   GET_STACK_TRACE_WITH_PC_AND_BP(max_size,                    \
-      StackTrace::GetCurrentPc(), GET_CURRENT_FRAME())
+      StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(), fast)
 
-#define GET_STACK_TRACE_HERE_FOR_MALLOC                             \
-  GET_STACK_TRACE_HERE(flags()->malloc_context_size)
+#define GET_STACK_TRACE_FATAL(pc, bp)                                 \
+  GET_STACK_TRACE_WITH_PC_AND_BP(kStackTraceMax, pc, bp,              \
+                                 flags()->fast_unwind_on_fatal)
 
-#define GET_STACK_TRACE_HERE_FOR_FREE(ptr)                          \
-  GET_STACK_TRACE_HERE(flags()->malloc_context_size)
+#define GET_STACK_TRACE_FATAL_HERE                           \
+  GET_STACK_TRACE(kStackTraceMax, flags()->fast_unwind_on_fatal)
+
+#define GET_STACK_TRACE_THREAD                              \
+  GET_STACK_TRACE(kStackTraceMax, true)
+
+#define GET_STACK_TRACE_MALLOC                             \
+  GET_STACK_TRACE(flags()->malloc_context_size,            \
+                  flags()->fast_unwind_on_malloc)
+
+#define GET_STACK_TRACE_FREE GET_STACK_TRACE_MALLOC
 
 #define PRINT_CURRENT_STACK()                    \
   {                                              \
-    GET_STACK_TRACE_HERE(kStackTraceMax);        \
+    GET_STACK_TRACE(kStackTraceMax,              \
+      flags()->fast_unwind_on_fatal);            \
     PrintStack(&stack);                          \
   }
 
diff --git a/lib/asan/asan_stats.cc b/lib/asan/asan_stats.cc
index cf8cadf..ba67c82 100644
--- a/lib/asan/asan_stats.cc
+++ b/lib/asan/asan_stats.cc
@@ -13,10 +13,9 @@
 //===----------------------------------------------------------------------===//
 #include "asan_interceptors.h"
 #include "asan_internal.h"
-#include "asan_lock.h"
 #include "asan_stats.h"
 #include "asan_thread_registry.h"
-#include "sanitizer/asan_interface.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
 
 namespace __asan {
 
@@ -42,8 +41,9 @@
   Printf("Stats: %zuM freed by %zu calls\n", freed>>20, frees);
   Printf("Stats: %zuM really freed by %zu calls\n",
              really_freed>>20, real_frees);
-  Printf("Stats: %zuM (%zu full pages) mmaped in %zu calls\n",
-             mmaped>>20, mmaped / GetPageSizeCached(), mmaps);
+  Printf("Stats: %zuM (%zuM-%zuM) mmaped; %zu maps, %zu unmaps\n",
+             (mmaped-munmaped)>>20, mmaped>>20, munmaped>>20,
+             mmaps, munmaps);
 
   PrintMallocStatsArray("  mmaps   by size class: ", mmaped_by_size);
   PrintMallocStatsArray("  mallocs by size class: ", malloced_by_size);
@@ -53,14 +53,18 @@
              malloc_large, malloc_small_slow);
 }
 
-static AsanLock print_lock(LINKER_INITIALIZED);
+static BlockingMutex print_lock(LINKER_INITIALIZED);
 
 static void PrintAccumulatedStats() {
   AsanStats stats;
   asanThreadRegistry().GetAccumulatedStats(&stats);
   // Use lock to keep reports from mixing up.
-  ScopedLock lock(&print_lock);
+  BlockingMutexLock lock(&print_lock);
   stats.Print();
+  StackDepotStats *stack_depot_stats = StackDepotGetStats();
+  Printf("Stats: StackDepot: %zd ids; %zdM mapped\n",
+         stack_depot_stats->n_uniq_ids, stack_depot_stats->mapped >> 20);
+  PrintInternalAllocatorStats();
 }
 
 }  // namespace __asan
diff --git a/lib/asan/asan_stats.h b/lib/asan/asan_stats.h
index 0c02b3a..37846bc 100644
--- a/lib/asan/asan_stats.h
+++ b/lib/asan/asan_stats.h
@@ -37,6 +37,8 @@
   uptr realloced;
   uptr mmaps;
   uptr mmaped;
+  uptr munmaps;
+  uptr munmaped;
   uptr mmaped_by_size[kNumberOfSizeClasses];
   uptr malloced_by_size[kNumberOfSizeClasses];
   uptr freed_by_size[kNumberOfSizeClasses];
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc
index a77e435..778e919 100644
--- a/lib/asan/asan_thread.cc
+++ b/lib/asan/asan_thread.cc
@@ -74,7 +74,7 @@
 void AsanThread::Init() {
   SetThreadStackTopAndBottom();
   CHECK(AddrIsInMem(stack_bottom_));
-  CHECK(AddrIsInMem(stack_top_));
+  CHECK(AddrIsInMem(stack_top_ - 1));
   ClearShadowForThreadStack();
   if (flags()->verbosity >= 1) {
     int local = 0;
diff --git a/lib/asan/asan_thread_registry.cc b/lib/asan/asan_thread_registry.cc
index 0e07e19..8067540 100644
--- a/lib/asan/asan_thread_registry.cc
+++ b/lib/asan/asan_thread_registry.cc
@@ -44,7 +44,7 @@
 }
 
 void AsanThreadRegistry::RegisterThread(AsanThread *thread) {
-  ScopedLock lock(&mu_);
+  BlockingMutexLock lock(&mu_);
   u32 tid = n_threads_;
   n_threads_++;
   CHECK(n_threads_ < kMaxNumberOfThreads);
@@ -56,7 +56,7 @@
 }
 
 void AsanThreadRegistry::UnregisterThread(AsanThread *thread) {
-  ScopedLock lock(&mu_);
+  BlockingMutexLock lock(&mu_);
   FlushToAccumulatedStatsUnlocked(&thread->stats());
   AsanThreadSummary *summary = thread->summary();
   CHECK(summary);
@@ -105,13 +105,13 @@
 }
 
 void AsanThreadRegistry::GetAccumulatedStats(AsanStats *stats) {
-  ScopedLock lock(&mu_);
+  BlockingMutexLock lock(&mu_);
   UpdateAccumulatedStatsUnlocked();
   internal_memcpy(stats, &accumulated_stats_, sizeof(accumulated_stats_));
 }
 
 uptr AsanThreadRegistry::GetCurrentAllocatedBytes() {
-  ScopedLock lock(&mu_);
+  BlockingMutexLock lock(&mu_);
   UpdateAccumulatedStatsUnlocked();
   uptr malloced = accumulated_stats_.malloced;
   uptr freed = accumulated_stats_.freed;
@@ -121,15 +121,16 @@
 }
 
 uptr AsanThreadRegistry::GetHeapSize() {
-  ScopedLock lock(&mu_);
+  BlockingMutexLock lock(&mu_);
   UpdateAccumulatedStatsUnlocked();
-  return accumulated_stats_.mmaped;
+  return accumulated_stats_.mmaped - accumulated_stats_.munmaped;
 }
 
 uptr AsanThreadRegistry::GetFreeBytes() {
-  ScopedLock lock(&mu_);
+  BlockingMutexLock lock(&mu_);
   UpdateAccumulatedStatsUnlocked();
   uptr total_free = accumulated_stats_.mmaped
+                  - accumulated_stats_.munmaped
                   + accumulated_stats_.really_freed
                   + accumulated_stats_.really_freed_redzones;
   uptr total_used = accumulated_stats_.malloced
@@ -142,7 +143,7 @@
 // Return several stats counters with a single call to
 // UpdateAccumulatedStatsUnlocked().
 void AsanThreadRegistry::FillMallocStatistics(AsanMallocStats *malloc_stats) {
-  ScopedLock lock(&mu_);
+  BlockingMutexLock lock(&mu_);
   UpdateAccumulatedStatsUnlocked();
   malloc_stats->blocks_in_use = accumulated_stats_.mallocs;
   malloc_stats->size_in_use = accumulated_stats_.malloced;
@@ -157,7 +158,7 @@
 }
 
 AsanThread *AsanThreadRegistry::FindThreadByStackAddress(uptr addr) {
-  ScopedLock lock(&mu_);
+  BlockingMutexLock lock(&mu_);
   for (u32 tid = 0; tid < n_threads_; tid++) {
     AsanThread *t = thread_summaries_[tid]->thread();
     if (!t || !(t->fake_stack().StackSize())) continue;
diff --git a/lib/asan/asan_thread_registry.h b/lib/asan/asan_thread_registry.h
index 2056e73..adb1a6d 100644
--- a/lib/asan/asan_thread_registry.h
+++ b/lib/asan/asan_thread_registry.h
@@ -15,10 +15,10 @@
 #ifndef ASAN_THREAD_REGISTRY_H
 #define ASAN_THREAD_REGISTRY_H
 
-#include "asan_lock.h"
 #include "asan_stack.h"
 #include "asan_stats.h"
 #include "asan_thread.h"
+#include "sanitizer_common/sanitizer_mutex.h"
 
 namespace __asan {
 
@@ -73,7 +73,7 @@
   // per-thread AsanStats.
   uptr max_malloced_memory_;
   u32 n_threads_;
-  AsanLock mu_;
+  BlockingMutex mu_;
   bool inited_;
 };
 
diff --git a/lib/asan/asan_win.cc b/lib/asan/asan_win.cc
index e620c71..d8ce050 100644
--- a/lib/asan/asan_win.cc
+++ b/lib/asan/asan_win.cc
@@ -17,22 +17,21 @@
 #include <dbghelp.h>
 #include <stdlib.h>
 
-#include <new>  // FIXME: temporarily needed for placement new in AsanLock.
-
 #include "asan_interceptors.h"
 #include "asan_internal.h"
-#include "asan_lock.h"
 #include "asan_thread.h"
 #include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_mutex.h"
 
 namespace __asan {
 
 // ---------------------- Stacktraces, symbols, etc. ---------------- {{{1
-static AsanLock dbghelp_lock(LINKER_INITIALIZED);
+static BlockingMutex dbghelp_lock(LINKER_INITIALIZED);
 static bool dbghelp_initialized = false;
 #pragma comment(lib, "dbghelp.lib")
 
-void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp) {
+void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp, bool fast) {
+  (void)fast;
   stack->max_size = max_s;
   void *tmp[kStackTraceMax];
 
@@ -55,42 +54,6 @@
     stack->trace[i] = (uptr)tmp[i + offset];
 }
 
-// ---------------------- AsanLock ---------------- {{{1
-enum LockState {
-  LOCK_UNINITIALIZED = 0,
-  LOCK_READY = -1,
-};
-
-AsanLock::AsanLock(LinkerInitialized li) {
-  // FIXME: see comments in AsanLock::Lock() for the details.
-  CHECK(li == LINKER_INITIALIZED || owner_ == LOCK_UNINITIALIZED);
-
-  CHECK(sizeof(CRITICAL_SECTION) <= sizeof(opaque_storage_));
-  InitializeCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
-  owner_ = LOCK_READY;
-}
-
-void AsanLock::Lock() {
-  if (owner_ == LOCK_UNINITIALIZED) {
-    // FIXME: hm, global AsanLock objects are not initialized?!?
-    // This might be a side effect of the clang+cl+link Frankenbuild...
-    new(this) AsanLock((LinkerInitialized)(LINKER_INITIALIZED + 1));
-
-    // FIXME: If it turns out the linker doesn't invoke our
-    // constructors, we should probably manually Lock/Unlock all the global
-    // locks while we're starting in one thread to avoid double-init races.
-  }
-  EnterCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
-  CHECK(owner_ == LOCK_READY);
-  owner_ = GetThreadSelf();
-}
-
-void AsanLock::Unlock() {
-  CHECK(owner_ == GetThreadSelf());
-  owner_ = LOCK_READY;
-  LeaveCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
-}
-
 // ---------------------- TSD ---------------- {{{1
 static bool tsd_key_inited = false;
 
@@ -139,7 +102,7 @@
   // Nothing here for now.
 }
 
-void ClearShadowMemoryForContext(void *context) {
+void ReadContextStack(void *context, uptr *stack, uptr *ssize) {
   UNIMPLEMENTED();
 }
 
@@ -151,7 +114,7 @@
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE NOINLINE
 bool __asan_symbolize(const void *addr, char *out_buffer, int buffer_size) {
-  ScopedLock lock(&dbghelp_lock);
+  BlockingMutexLock lock(&dbghelp_lock);
   if (!dbghelp_initialized) {
     SymSetOptions(SYMOPT_DEFERRED_LOADS |
                   SYMOPT_UNDNAME |
diff --git a/lib/asan/dynamic/Makefile.mk b/lib/asan/dynamic/Makefile.mk
deleted file mode 100644
index 897844e..0000000
--- a/lib/asan/dynamic/Makefile.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-#===- lib/asan/dynamic/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := asan_dynamic
-SubDirs :=
-
-Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
-ObjNames := $(Sources:%.cc=%.o)
-
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
-Dependencies += $(wildcard $(Dir)/../../interception/*.h)
-Dependencies += $(wildcard $(Dir)/../../interception/mach_override/*.h)
-Dependencies += $(wildcard $(Dir)/../../sanitizer_common/*.h)
-
-# Define a convenience variable for the asan dynamic functions.
-AsanDynamicFunctions := $(Sources:%.cc=%)
diff --git a/lib/asan/dynamic/asan_interceptors_dynamic.cc b/lib/asan/dynamic/asan_interceptors_dynamic.cc
deleted file mode 100644
index 4f0f7bd..0000000
--- a/lib/asan/dynamic/asan_interceptors_dynamic.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-//===-- asan_interceptors_dynamic.cc --------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of AddressSanitizer, an address sanity checker.
-//
-// __DATA,__interpose section of the dynamic runtime library for Mac OS.
-//===----------------------------------------------------------------------===//
-
-#if defined(__APPLE__)
-
-#include "../asan_interceptors.h"
-#include "../asan_intercepted_functions.h"
-
-namespace __asan {
-
-#if !MAC_INTERPOSE_FUNCTIONS
-# error \
-  Dynamic interposing library should be built with -DMAC_INTERPOSE_FUNCTIONS
-#endif
-
-#define INTERPOSE_FUNCTION(function) \
-    { reinterpret_cast<const uptr>(WRAP(function)), \
-      reinterpret_cast<const uptr>(function) }
-
-#define INTERPOSE_FUNCTION_2(function, wrapper) \
-    { reinterpret_cast<const uptr>(wrapper), \
-      reinterpret_cast<const uptr>(function) }
-
-struct interpose_substitution {
-  const uptr replacement;
-  const uptr original;
-};
-
-__attribute__((used))
-const interpose_substitution substitutions[]
-    __attribute__((section("__DATA, __interpose"))) = {
-  INTERPOSE_FUNCTION(strlen),
-  INTERPOSE_FUNCTION(memcmp),
-  INTERPOSE_FUNCTION(memcpy),
-  INTERPOSE_FUNCTION(memmove),
-  INTERPOSE_FUNCTION(memset),
-  INTERPOSE_FUNCTION(strchr),
-  INTERPOSE_FUNCTION(strcat),
-  INTERPOSE_FUNCTION(strncat),
-  INTERPOSE_FUNCTION(strcpy),
-  INTERPOSE_FUNCTION(strncpy),
-  INTERPOSE_FUNCTION(pthread_create),
-  INTERPOSE_FUNCTION(longjmp),
-#if ASAN_INTERCEPT__LONGJMP
-  INTERPOSE_FUNCTION(_longjmp),
-#endif
-#if ASAN_INTERCEPT_SIGLONGJMP
-  INTERPOSE_FUNCTION(siglongjmp),
-#endif
-#if ASAN_INTERCEPT_STRDUP
-  INTERPOSE_FUNCTION(strdup),
-#endif
-#if ASAN_INTERCEPT_STRNLEN
-  INTERPOSE_FUNCTION(strnlen),
-#endif
-#if ASAN_INTERCEPT_INDEX
-  INTERPOSE_FUNCTION_2(index, WRAP(strchr)),
-#endif
-  INTERPOSE_FUNCTION(strcmp),
-  INTERPOSE_FUNCTION(strncmp),
-#if ASAN_INTERCEPT_STRCASECMP_AND_STRNCASECMP
-  INTERPOSE_FUNCTION(strcasecmp),
-  INTERPOSE_FUNCTION(strncasecmp),
-#endif
-  INTERPOSE_FUNCTION(atoi),
-  INTERPOSE_FUNCTION(atol),
-  INTERPOSE_FUNCTION(strtol),
-#if ASAN_INTERCEPT_ATOLL_AND_STRTOLL
-  INTERPOSE_FUNCTION(atoll),
-  INTERPOSE_FUNCTION(strtoll),
-#endif
-#if ASAN_INTERCEPT_MLOCKX
-  INTERPOSE_FUNCTION(mlock),
-  INTERPOSE_FUNCTION(munlock),
-  INTERPOSE_FUNCTION(mlockall),
-  INTERPOSE_FUNCTION(munlockall),
-#endif
-  INTERPOSE_FUNCTION(dispatch_async_f),
-  INTERPOSE_FUNCTION(dispatch_sync_f),
-  INTERPOSE_FUNCTION(dispatch_after_f),
-  INTERPOSE_FUNCTION(dispatch_barrier_async_f),
-  INTERPOSE_FUNCTION(dispatch_group_async_f),
-#ifndef MISSING_BLOCKS_SUPPORT
-  INTERPOSE_FUNCTION(dispatch_group_async),
-  INTERPOSE_FUNCTION(dispatch_async),
-  INTERPOSE_FUNCTION(dispatch_after),
-  INTERPOSE_FUNCTION(dispatch_source_set_event_handler),
-  INTERPOSE_FUNCTION(dispatch_source_set_cancel_handler),
-#endif
-  INTERPOSE_FUNCTION(signal),
-  INTERPOSE_FUNCTION(sigaction),
-
-  INTERPOSE_FUNCTION(__CFInitialize),
-  INTERPOSE_FUNCTION(CFStringCreateCopy),
-  INTERPOSE_FUNCTION(free),
-};
-
-}  // namespace __asan
-
-#endif  // __APPLE__
diff --git a/lib/asan/lit_tests/CMakeLists.txt b/lib/asan/lit_tests/CMakeLists.txt
index afe76a9..1609032 100644
--- a/lib/asan/lit_tests/CMakeLists.txt
+++ b/lib/asan/lit_tests/CMakeLists.txt
@@ -11,9 +11,8 @@
   ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
   )
 
-if("${CMAKE_HOST_SYSTEM}" STREQUAL "${CMAKE_SYSTEM}")
-  # Run ASan output tests only if we're not cross-compiling,
-  # and can be sure that clang would produce working binaries.
+if(COMPILER_RT_CAN_EXECUTE_TESTS)
+  # Run ASan tests only if we're sure we may produce working binaries.
   set(ASAN_TEST_DEPS
     clang clang-headers FileCheck count not llvm-nm llvm-symbolizer
     ${ASAN_RUNTIME_LIBRARIES}
@@ -30,11 +29,4 @@
     DEPENDS ${ASAN_TEST_DEPS}
     )
   set_target_properties(check-asan PROPERTIES FOLDER "ASan tests")
-elseif(LLVM_INCLUDE_TESTS)
-  # Otherwise run only ASan unit tests.
-  add_lit_testsuite(check-asan "Running the AddressSanitizer unit tests"
-    ${CMAKE_CURRENT_BINARY_DIR}/Unit
-    DEPENDS AsanUnitTests
-    )
-  set_target_properties(check-asan PROPERTIES FOLDER "ASan unit tests")
 endif()
diff --git a/lib/asan/lit_tests/Darwin/interface_symbols_darwin.c b/lib/asan/lit_tests/Darwin/interface_symbols_darwin.c
new file mode 100644
index 0000000..18bba70
--- /dev/null
+++ b/lib/asan/lit_tests/Darwin/interface_symbols_darwin.c
@@ -0,0 +1,39 @@
+// Check the presense of interface symbols in the ASan runtime dylib.
+// If you're changing this file, please also change
+// ../Linux/interface_symbols.c
+
+// RUN: %clang -fsanitize=address -dead_strip -O2 %s -o %t.exe
+// RUN: rm -f %t.symbols %t.interface
+
+// RUN: nm `otool -L %t.exe | grep "asan_osx_dynamic.dylib" | \
+// RUN:                       tr -d '\011' | \
+// RUN:                       sed "s/.dylib.*/.dylib/"` \
+// RUN:   | grep " T " | sed "s/.* T //" \
+// RUN:   | grep "__asan_" | sed "s/___asan_/__asan_/" \
+// RUN:   | grep -v "__asan_malloc_hook" \
+// RUN:   | grep -v "__asan_free_hook" \
+// RUN:   | grep -v "__asan_symbolize" \
+// RUN:   | grep -v "__asan_default_options" \
+// RUN:   | grep -v "__asan_on_error" > %t.symbols
+
+// RUN: cat %p/../../asan_interface_internal.h \
+// RUN:    | sed "s/\/\/.*//" | sed "s/typedef.*//" \
+// RUN:    | grep -v "OPTIONAL" \
+// RUN:    | grep "__asan_.*(" | sed "s/.* __asan_/__asan_/;s/(.*//" \
+// RUN:    > %t.interface
+// RUN: echo __asan_report_load1 >> %t.interface
+// RUN: echo __asan_report_load2 >> %t.interface
+// RUN: echo __asan_report_load4 >> %t.interface
+// RUN: echo __asan_report_load8 >> %t.interface
+// RUN: echo __asan_report_load16 >> %t.interface
+// RUN: echo __asan_report_store1 >> %t.interface
+// RUN: echo __asan_report_store2 >> %t.interface
+// RUN: echo __asan_report_store4 >> %t.interface
+// RUN: echo __asan_report_store8 >> %t.interface
+// RUN: echo __asan_report_store16 >> %t.interface
+// RUN: echo __asan_report_load_n >> %t.interface
+// RUN: echo __asan_report_store_n >> %t.interface
+
+// RUN: cat %t.interface | sort -u | diff %t.symbols -
+
+int main() { return 0; }
diff --git a/lib/asan/lit_tests/Darwin/lit.local.cfg b/lib/asan/lit_tests/Darwin/lit.local.cfg
new file mode 100644
index 0000000..a85dfcd
--- /dev/null
+++ b/lib/asan/lit_tests/Darwin/lit.local.cfg
@@ -0,0 +1,9 @@
+def getRoot(config):
+  if not config.parent:
+    return config
+  return getRoot(config.parent)
+
+root = getRoot(config)
+
+if root.host_os not in ['Darwin']:
+  config.unsupported = True
diff --git a/lib/asan/lit_tests/Darwin/reexec-insert-libraries-env.cc b/lib/asan/lit_tests/Darwin/reexec-insert-libraries-env.cc
new file mode 100644
index 0000000..40a459f
--- /dev/null
+++ b/lib/asan/lit_tests/Darwin/reexec-insert-libraries-env.cc
@@ -0,0 +1,20 @@
+// Make sure ASan doesn't hang in an exec loop if DYLD_INSERT_LIBRARIES is set.
+// This is a regression test for
+// https://code.google.com/p/address-sanitizer/issues/detail?id=159
+
+// RUN: %clangxx_asan -m64 %s -o %t
+// RUN: %clangxx -m64 %p/../SharedLibs/darwin-dummy-shared-lib-so.cc \
+// RUN:     -dynamiclib -o darwin-dummy-shared-lib-so.dylib
+
+// FIXME: the following command line may hang in the case of a regression.
+// RUN: DYLD_INSERT_LIBRARIES=darwin-dummy-shared-lib-so.dylib \
+// RUN:     %t 2>&1 | FileCheck %s || exit 1
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  const char kEnvName[] = "DYLD_INSERT_LIBRARIES";
+  printf("%s=%s\n", kEnvName, getenv(kEnvName));
+  // CHECK: {{DYLD_INSERT_LIBRARIES=.*darwin-dummy-shared-lib-so.dylib.*}}
+  return 0;
+}
diff --git a/lib/asan/lit_tests/Darwin/unset-insert-libraries-on-exec.cc b/lib/asan/lit_tests/Darwin/unset-insert-libraries-on-exec.cc
new file mode 100644
index 0000000..cf89949
--- /dev/null
+++ b/lib/asan/lit_tests/Darwin/unset-insert-libraries-on-exec.cc
@@ -0,0 +1,20 @@
+// Make sure ASan removes the runtime library from DYLD_INSERT_LIBRARIES before
+// executing other programs.
+
+// RUN: %clangxx_asan -m64 %s -o %t
+// RUN: %clangxx -m64 %p/../SharedLibs/darwin-dummy-shared-lib-so.cc \
+// RUN:     -dynamiclib -o darwin-dummy-shared-lib-so.dylib
+
+// Make sure DYLD_INSERT_LIBRARIES doesn't contain the runtime library before
+// execl().
+
+// RUN: %t >/dev/null 2>&1
+// RUN: DYLD_INSERT_LIBRARIES=darwin-dummy-shared-lib-so.dylib \
+// RUN:     %t 2>&1 | FileCheck %s || exit 1
+#include <unistd.h>
+int main() {
+  execl("/bin/bash", "/bin/bash", "-c",
+        "echo DYLD_INSERT_LIBRARIES=$DYLD_INSERT_LIBRARIES", NULL);
+  // CHECK:  {{DYLD_INSERT_LIBRARIES=.*darwin-dummy-shared-lib-so.dylib.*}}
+  return 0;
+}
diff --git a/lib/asan/lit_tests/Linux/asan_prelink_test.cc b/lib/asan/lit_tests/Linux/asan_prelink_test.cc
new file mode 100644
index 0000000..522c191
--- /dev/null
+++ b/lib/asan/lit_tests/Linux/asan_prelink_test.cc
@@ -0,0 +1,26 @@
+// Test if asan works with prelink.
+// It does not actually use prelink, but relies on ld's flag -Ttext-segment
+// or gold's flag -Ttext (we try the first flag first, if that fails we
+// try the second flag).
+//
+// RUN: %clangxx_asan -m64 -c %s -o %t.o
+// RUN: %clangxx_asan -m64 -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext-segment=0x3600000000 ||\
+// RUN: %clangxx_asan -m64 -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext=0x3600000000
+// RUN: %clangxx_asan -m64 %t.o %t.so -Wl,-R. -o %t
+// RUN: ASAN_OPTIONS=verbosity=1 %t 2>&1 | FileCheck %s
+#if BUILD_SO
+int G;
+int *getG() {
+  return &G;
+}
+#else
+#include <stdio.h>
+extern int *getG();
+int main(int argc, char **argv) {
+  long p = (long)getG();
+  printf("SO mapped at %lx\n", p & ~0xffffffffUL);
+  *getG() = 0;
+}
+#endif
+// CHECK: 0x003000000000, 0x004fffffffff{{.*}} MidMem
+// CHECK: SO mapped at 3600000000
diff --git a/lib/asan/lit_tests/Linux/initialization-bug-any-order.cc b/lib/asan/lit_tests/Linux/initialization-bug-any-order.cc
index c43b1f5..645fe1c 100644
--- a/lib/asan/lit_tests/Linux/initialization-bug-any-order.cc
+++ b/lib/asan/lit_tests/Linux/initialization-bug-any-order.cc
@@ -22,7 +22,7 @@
   return y + 1;
   // CHECK: {{AddressSanitizer: initialization-order-fiasco}}
   // CHECK: {{READ of size .* at 0x.* thread T0}}
-  // CHECK: {{#0 0x.* in .*initX.* .*initialization-bug-any-order.cc:22}}
+  // CHECK: {{#0 0x.* in .*initX.* .*initialization-bug-any-order.cc:}}[[@LINE-3]]
   // CHECK: {{0x.* is located 0 bytes inside of global variable .*y.*}}
 }
 
diff --git a/lib/asan/lit_tests/interface_symbols.c b/lib/asan/lit_tests/Linux/interface_symbols_linux.c
similarity index 84%
rename from lib/asan/lit_tests/interface_symbols.c
rename to lib/asan/lit_tests/Linux/interface_symbols_linux.c
index f3167f5..6ea61e6 100644
--- a/lib/asan/lit_tests/interface_symbols.c
+++ b/lib/asan/lit_tests/Linux/interface_symbols_linux.c
@@ -1,6 +1,6 @@
 // Check the presense of interface symbols in compiled file.
 
-// RUN: %clang -fsanitize=address -dead_strip -O2 %s -o %t.exe
+// RUN: %clang -fsanitize=address -O2 %s -o %t.exe
 // RUN: nm %t.exe | grep " T " | sed "s/.* T //" \
 // RUN:    | grep "__asan_" | sed "s/___asan_/__asan_/" \
 // RUN:    | grep -v "__asan_malloc_hook" \
@@ -8,7 +8,7 @@
 // RUN:    | grep -v "__asan_symbolize" \
 // RUN:    | grep -v "__asan_default_options" \
 // RUN:    | grep -v "__asan_on_error" > %t.symbols
-// RUN: cat %p/../../../include/sanitizer/asan_interface.h \
+// RUN: cat %p/../../asan_interface_internal.h \
 // RUN:    | sed "s/\/\/.*//" | sed "s/typedef.*//" \
 // RUN:    | grep -v "OPTIONAL" \
 // RUN:    | grep "__asan_.*(" | sed "s/.* __asan_/__asan_/;s/(.*//" \
@@ -23,6 +23,8 @@
 // RUN: echo __asan_report_store4 >> %t.interface
 // RUN: echo __asan_report_store8 >> %t.interface
 // RUN: echo __asan_report_store16 >> %t.interface
+// RUN: echo __asan_report_load_n >> %t.interface
+// RUN: echo __asan_report_store_n >> %t.interface
 // RUN: cat %t.interface | sort -u | diff %t.symbols -
 
 int main() { return 0; }
diff --git a/lib/asan/lit_tests/Linux/malloc-in-qsort.cc b/lib/asan/lit_tests/Linux/malloc-in-qsort.cc
new file mode 100644
index 0000000..a3fa255
--- /dev/null
+++ b/lib/asan/lit_tests/Linux/malloc-in-qsort.cc
@@ -0,0 +1,50 @@
+// RUN: %clangxx_asan -O2 %s -o %t
+// RUN: ASAN_OPTIONS=fast_unwind_on_malloc=1 %t 2>&1 | %symbolize | FileCheck %s --check-prefix=CHECK-FAST
+// RUN: ASAN_OPTIONS=fast_unwind_on_malloc=0 %t 2>&1 | %symbolize | FileCheck %s --check-prefix=CHECK-SLOW
+
+// Test how well we unwind in presence of qsort in the stack
+// (i.e. if we can unwind through a function compiled w/o frame pointers).
+// https://code.google.com/p/address-sanitizer/issues/detail?id=137
+#include <stdlib.h>
+#include <stdio.h>
+
+int *GlobalPtr;
+
+extern "C" {
+int QsortCallback(const void *a, const void *b) {
+  char *x = (char*)a;
+  char *y = (char*)b;
+  printf("Calling QsortCallback\n");
+  GlobalPtr = new int[10];
+  return (int)*x - (int)*y;
+}
+
+__attribute__((noinline))
+void MyQsort(char *a, size_t size) {
+  printf("Calling qsort\n");
+  qsort(a, size, sizeof(char), QsortCallback);
+  printf("Done\n");  // Avoid tail call.
+}
+}  // extern "C"
+
+int main() {
+  char a[2] = {1, 2};
+  MyQsort(a, 2);
+  return GlobalPtr[10];
+}
+
+// Fast unwind: can not unwind through qsort.
+// FIXME: this test does not properly work with slow unwind yet.
+
+// CHECK-FAST: ERROR: AddressSanitizer: heap-buffer-overflow
+// CHECK-FAST: is located 0 bytes to the right
+// CHECK-FAST: #0{{.*}}operator new
+// CHECK-FAST-NEXT: #1{{.*}}QsortCallback
+// CHECK-FAST-NOT: MyQsort
+//
+// CHECK-SLOW: ERROR: AddressSanitizer: heap-buffer-overflow
+// CHECK-SLOW: is located 0 bytes to the right
+// CHECK-SLOW: #0{{.*}}operator new
+// CHECK-SLOW-NEXT: #1{{.*}}QsortCallback
+// CHECK-SLOW: #{{.*}}MyQsort
+// CHECK-SLOW-NEXT: #{{.*}}main
diff --git a/lib/asan/lit_tests/Linux/malloc_delete_mismatch.cc b/lib/asan/lit_tests/Linux/malloc_delete_mismatch.cc
new file mode 100644
index 0000000..f34b33a
--- /dev/null
+++ b/lib/asan/lit_tests/Linux/malloc_delete_mismatch.cc
@@ -0,0 +1,26 @@
+// Check that we detect malloc/delete mismatch only if the approptiate flag
+// is set.
+
+// RUN: %clangxx_asan -g %s -o %t 2>&1
+// RUN: ASAN_OPTIONS=alloc_dealloc_mismatch=1 %t 2>&1 | \
+// RUN: %symbolize | FileCheck %s
+
+// No error here.
+// RUN: ASAN_OPTIONS=alloc_dealloc_mismatch=0 %t
+#include <stdlib.h>
+
+static volatile char *x;
+
+int main() {
+  x = (char*)malloc(10);
+  x[0] = 0;
+  delete x;
+}
+// CHECK: ERROR: AddressSanitizer: alloc-dealloc-mismatch (malloc vs operator delete) on 0x
+// CHECK-NEXT: #0{{.*}}operator delete
+// CHECK: #{{.*}}main
+// CHECK: is located 0 bytes inside of 10-byte region
+// CHECK-NEXT: allocated by thread T0 here:
+// CHECK-NEXT: #0{{.*}}malloc
+// CHECK: #{{.*}}main
+// CHECK: HINT: {{.*}} you may set ASAN_OPTIONS=alloc_dealloc_mismatch=0
diff --git a/lib/asan/lit_tests/Linux/overflow-in-qsort.cc b/lib/asan/lit_tests/Linux/overflow-in-qsort.cc
new file mode 100644
index 0000000..c298991
--- /dev/null
+++ b/lib/asan/lit_tests/Linux/overflow-in-qsort.cc
@@ -0,0 +1,47 @@
+// RUN: %clangxx_asan -O2 %s -o %t
+// RUN: ASAN_OPTIONS=fast_unwind_on_fatal=1 %t 2>&1 | %symbolize | FileCheck %s --check-prefix=CHECK-FAST
+// RUN: ASAN_OPTIONS=fast_unwind_on_fatal=0 %t 2>&1 | %symbolize | FileCheck %s --check-prefix=CHECK-SLOW
+
+// Test how well we unwind in presence of qsort in the stack
+// (i.e. if we can unwind through a function compiled w/o frame pointers).
+// https://code.google.com/p/address-sanitizer/issues/detail?id=137
+#include <stdlib.h>
+#include <stdio.h>
+
+int global_array[10];
+volatile int one = 1;
+
+extern "C" {
+int QsortCallback(const void *a, const void *b) {
+  char *x = (char*)a;
+  char *y = (char*)b;
+  printf("Calling QsortCallback\n");
+  global_array[one * 10] = 0;  // BOOM
+  return (int)*x - (int)*y;
+}
+
+__attribute__((noinline))
+void MyQsort(char *a, size_t size) {
+  printf("Calling qsort\n");
+  qsort(a, size, sizeof(char), QsortCallback);
+  printf("Done\n");  // Avoid tail call.
+}
+}  // extern "C"
+
+int main() {
+  char a[2] = {1, 2};
+  MyQsort(a, 2);
+}
+
+// Fast unwind: can not unwind through qsort.
+
+// CHECK-FAST: ERROR: AddressSanitizer: global-buffer-overflow
+// CHECK-FAST: #0{{.*}} in QsortCallback
+// CHECK-FAST-NOT: MyQsort
+// CHECK-FAST: is located 0 bytes to the right of global variable 'global_array
+
+// CHECK-SLOW: ERROR: AddressSanitizer: global-buffer-overflow
+// CHECK-SLOW: #0{{.*}} in QsortCallback
+// CHECK-SLOW: #{{.*}} in MyQsort
+// CHECK-SLOW: #{{.*}} in main
+// CHECK-SLOW: is located 0 bytes to the right of global variable 'global_array
diff --git a/lib/asan/lit_tests/Linux/preinit_test.cc b/lib/asan/lit_tests/Linux/preinit_test.cc
new file mode 100644
index 0000000..28e5094
--- /dev/null
+++ b/lib/asan/lit_tests/Linux/preinit_test.cc
@@ -0,0 +1,27 @@
+// RUN: %clangxx      -DFUNC=zzzz %s -shared -o %t.so -fPIC
+// RUN: %clangxx_asan -DFUNC=main %s         -o %t    -Wl,-R. %t.so
+// RUN: %t
+
+// This test ensures that we call __asan_init early enough.
+// We build a shared library w/o asan instrumentation
+// and the binary with asan instrumentation.
+// Both files include the same header (emulated by -DFUNC here)
+// with C++ template magic which runs global initializer at library load time.
+// The function get() is instrumented with asan, but called
+// before the usual constructors are run.
+// So, we must make sure that __asan_init is executed even earlier.
+//
+// See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56393
+
+struct A {
+  int foo() const { return 0; }
+};
+A get () { return A(); }
+template <class> struct O {
+  static A const e;
+};
+template <class T> A const O <T>::e = get();
+int FUNC() {
+  return O<int>::e.foo();
+}
+
diff --git a/lib/asan/lit_tests/Linux/rlimit_mmap_test.cc b/lib/asan/lit_tests/Linux/rlimit_mmap_test.cc
index 5026e24..8679475 100644
--- a/lib/asan/lit_tests/Linux/rlimit_mmap_test.cc
+++ b/lib/asan/lit_tests/Linux/rlimit_mmap_test.cc
@@ -11,6 +11,6 @@
   struct rlimit mmap_resource_limit = { 0, 0 };
   assert(0 == setrlimit(RLIMIT_AS, &mmap_resource_limit));
   x = malloc(10000000);
-// CHECK: AddressSanitizer is unable to mmap
+// CHECK: ERROR: Failed to mmap
   return 0;
 }
diff --git a/lib/asan/lit_tests/Linux/zero-base-shadow.cc b/lib/asan/lit_tests/Linux/zero-base-shadow.cc
new file mode 100644
index 0000000..d6ea1aa
--- /dev/null
+++ b/lib/asan/lit_tests/Linux/zero-base-shadow.cc
@@ -0,0 +1,27 @@
+// RUN: %clangxx_asan -m64 -O0 -fsanitize-address-zero-base-shadow -fPIE -pie %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-64 < %t.out
+// RUN: %clangxx_asan -m64 -O1 -fsanitize-address-zero-base-shadow -fPIE -pie %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-64 < %t.out
+// RUN: %clangxx_asan -m64 -O2 -fsanitize-address-zero-base-shadow -fPIE -pie %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-64 < %t.out
+// RUN: %clangxx_asan -m32 -O0 -fsanitize-address-zero-base-shadow -fPIE -pie %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-32 < %t.out
+// RUN: %clangxx_asan -m32 -O1 -fsanitize-address-zero-base-shadow -fPIE -pie %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-32 < %t.out
+// RUN: %clangxx_asan -m32 -O2 -fsanitize-address-zero-base-shadow -fPIE -pie %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-32 < %t.out
+
+#include <string.h>
+int main(int argc, char **argv) {
+  char x[10];
+  memset(x, 0, 10);
+  int res = x[argc * 10];  // BOOOM
+  // CHECK: {{READ of size 1 at 0x.* thread T0}}
+  // CHECK: {{    #0 0x.* in _?main .*zero-base-shadow.cc:}}[[@LINE-2]]
+  // CHECK: {{Address 0x.* is .* frame <main>}}
+
+  // Check that shadow for stack memory occupies lower part of address space.
+  // CHECK-64: =>0x0f{{.*}}
+  // CHECK-32: =>0x1f{{.*}}
+  return res;
+}
diff --git a/lib/asan/lit_tests/SharedLibs/darwin-dummy-shared-lib-so.cc b/lib/asan/lit_tests/SharedLibs/darwin-dummy-shared-lib-so.cc
new file mode 100644
index 0000000..5d93999
--- /dev/null
+++ b/lib/asan/lit_tests/SharedLibs/darwin-dummy-shared-lib-so.cc
@@ -0,0 +1,13 @@
+//===----------- darwin-dummy-shared-lib-so.cc ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+void foo() {}
diff --git a/lib/asan/lit_tests/Unit/lit.site.cfg.in b/lib/asan/lit_tests/Unit/lit.site.cfg.in
index 401c3a8..07584a6 100644
--- a/lib/asan/lit_tests/Unit/lit.site.cfg.in
+++ b/lib/asan/lit_tests/Unit/lit.site.cfg.in
@@ -3,8 +3,14 @@
 
 config.target_triple = "@TARGET_TRIPLE@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
-config.build_type = "@CMAKE_BUILD_TYPE@"
+config.llvm_build_mode = "@LLVM_BUILD_MODE@"
 config.asan_binary_dir = "@ASAN_BINARY_DIR@"
 
+try:
+  config.llvm_build_mode = config.llvm_build_mode % lit.params
+except KeyError,e:
+  key, = e.args
+  lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key, key))
+
 # Let the main config do the real work.
 lit.load_config(config, "@ASAN_SOURCE_DIR@/lit_tests/Unit/lit.cfg")
diff --git a/lib/asan/lit_tests/deep_stack_uaf.cc b/lib/asan/lit_tests/deep_stack_uaf.cc
index e4481be..7b32798 100644
--- a/lib/asan/lit_tests/deep_stack_uaf.cc
+++ b/lib/asan/lit_tests/deep_stack_uaf.cc
@@ -25,7 +25,7 @@
 };
 
 int main() {
-  char *x = new char[10];
+  char *x = (char*)malloc(10);
   // deep_free(x);
   DeepFree<200>::free(x);
   return x[5];
diff --git a/lib/asan/lit_tests/default_options.cc b/lib/asan/lit_tests/default_options.cc
index 950a7d8..84b8055 100644
--- a/lib/asan/lit_tests/default_options.cc
+++ b/lib/asan/lit_tests/default_options.cc
@@ -4,7 +4,7 @@
 const char *kAsanDefaultOptions="verbosity=1 foo=bar";
 
 extern "C"
-__attribute__((no_address_safety_analysis))
+__attribute__((no_sanitize_address))
 const char *__asan_default_options() {
   // CHECK: Using the defaults from __asan_default_options: {{.*}} foo=bar
   return kAsanDefaultOptions;
diff --git a/lib/asan/lit_tests/global-overflow.cc b/lib/asan/lit_tests/global-overflow.cc
index fec3ce8..6a2f12e 100644
--- a/lib/asan/lit_tests/global-overflow.cc
+++ b/lib/asan/lit_tests/global-overflow.cc
@@ -17,7 +17,7 @@
   memset(ZZZ, 0, 10);
   int res = YYY[argc * 10];  // BOOOM
   // CHECK: {{READ of size 1 at 0x.* thread T0}}
-  // CHECK: {{    #0 0x.* in _?main .*global-overflow.cc:18}}
+  // CHECK: {{    #0 0x.* in _?main .*global-overflow.cc:}}[[@LINE-2]]
   // CHECK: {{0x.* is located 0 bytes to the right of global variable}}
   // CHECK:   {{.*YYY.* of size 10}}
   res += XXX[argc] + ZZZ[argc];
diff --git a/lib/asan/lit_tests/heap-overflow.cc b/lib/asan/lit_tests/heap-overflow.cc
index 2cd6d03..f1d719c 100644
--- a/lib/asan/lit_tests/heap-overflow.cc
+++ b/lib/asan/lit_tests/heap-overflow.cc
@@ -22,17 +22,15 @@
   memset(x, 0, 10);
   int res = x[argc * 10];  // BOOOM
   // CHECK: {{READ of size 1 at 0x.* thread T0}}
-  // CHECK: {{    #0 0x.* in _?main .*heap-overflow.cc:23}}
+  // CHECK: {{    #0 0x.* in _?main .*heap-overflow.cc:}}[[@LINE-2]]
   // CHECK: {{0x.* is located 0 bytes to the right of 10-byte region}}
   // CHECK: {{allocated by thread T0 here:}}
 
   // CHECK-Linux: {{    #0 0x.* in .*malloc}}
   // CHECK-Linux: {{    #1 0x.* in main .*heap-overflow.cc:21}}
 
-  // CHECK-Darwin: {{    #0 0x.* in .*mz_malloc.*}}
-  // CHECK-Darwin: {{    #1 0x.* in malloc_zone_malloc.*}}
-  // CHECK-Darwin: {{    #2 0x.* in malloc.*}}
-  // CHECK-Darwin: {{    #3 0x.* in _?main .*heap-overflow.cc:21}}
+  // CHECK-Darwin: {{    #0 0x.* in _?wrap_malloc.*}}
+  // CHECK-Darwin: {{    #1 0x.* in _?main .*heap-overflow.cc:21}}
   free(x);
   return res;
 }
diff --git a/lib/asan/lit_tests/interface_test.cc b/lib/asan/lit_tests/interface_test.cc
new file mode 100644
index 0000000..428a109
--- /dev/null
+++ b/lib/asan/lit_tests/interface_test.cc
@@ -0,0 +1,8 @@
+// Check that user may include ASan interface header.
+// RUN: %clang -fsanitize=address -I %p/../../../include %s -o %t && %t
+// RUN: %clang -I %p/../../../include %s -o %t && %t
+#include <sanitizer/asan_interface.h>
+
+int main() {
+  return 0;
+}
diff --git a/lib/asan/lit_tests/large_func_test.cc b/lib/asan/lit_tests/large_func_test.cc
index f67b150..ceecc29 100644
--- a/lib/asan/lit_tests/large_func_test.cc
+++ b/lib/asan/lit_tests/large_func_test.cc
@@ -29,7 +29,14 @@
   x[8]++;
   x[9]++;
 
-  x[zero + 111]++;  // we should report this exact line
+  // CHECK: {{.*ERROR: AddressSanitizer: heap-buffer-overflow on address}}
+  // CHECK:   {{0x.* at pc 0x.* bp 0x.* sp 0x.*}}
+  // CHECK: {{READ of size 4 at 0x.* thread T0}}
+  x[zero + 103]++;  // we should report this exact line
+  // atos incorrectly extracts the symbol name for the static functions on
+  // Darwin.
+  // CHECK-Linux:  {{#0 0x.* in LargeFunction.*large_func_test.cc:}}[[@LINE-3]]
+  // CHECK-Darwin: {{#0 0x.* in .*LargeFunction.*large_func_test.cc}}:[[@LINE-4]]
 
   x[10]++;
   x[11]++;
@@ -46,20 +53,11 @@
 int main(int argc, char **argv) {
   int *x = new int[100];
   LargeFunction(x, argc - 1);
+  // CHECK: {{    #1 0x.* in _?main .*large_func_test.cc:}}[[@LINE-1]]
+  // CHECK: {{0x.* is located 12 bytes to the right of 400-byte region}}
+  // CHECK: {{allocated by thread T0 here:}}
+  // CHECK-Linux: {{    #0 0x.* in operator new.*}}
+  // CHECK-Darwin: {{    #0 0x.* in .*_Zna.*}}
+  // CHECK: {{    #1 0x.* in _?main .*large_func_test.cc:}}[[@LINE-7]]
   delete x;
 }
-
-// CHECK: {{.*ERROR: AddressSanitizer: heap-buffer-overflow on address}}
-// CHECK:   {{0x.* at pc 0x.* bp 0x.* sp 0x.*}}
-// CHECK: {{READ of size 4 at 0x.* thread T0}}
-
-// atos incorrectly extracts the symbol name for the static functions on
-// Darwin.
-// CHECK-Linux:  {{    #0 0x.* in LargeFunction.*large_func_test.cc:32}}
-// CHECK-Darwin: {{    #0 0x.* in .*LargeFunction.*large_func_test.cc:32}}
-
-// CHECK: {{    #1 0x.* in _?main .*large_func_test.cc:48}}
-// CHECK: {{0x.* is located 44 bytes to the right of 400-byte region}}
-// CHECK: {{allocated by thread T0 here:}}
-// CHECK: {{    #0 0x.* in operator new.*}}
-// CHECK: {{    #1 0x.* in _?main .*large_func_test.cc:47}}
diff --git a/lib/asan/lit_tests/log_path_fork_test.cc.disabled b/lib/asan/lit_tests/log_path_fork_test.cc.disabled
new file mode 100644
index 0000000..c6c1b49
--- /dev/null
+++ b/lib/asan/lit_tests/log_path_fork_test.cc.disabled
@@ -0,0 +1,22 @@
+// RUN: %clangxx_asan  %s -o %t
+// RUN: rm -f %t.log.*
+// Set verbosity to 1 so that the log files are opened prior to fork().
+// RUN: ASAN_OPTIONS="log_path=%t.log verbosity=1" not %t 2> %t.out
+// RUN: for f in %t.log.* ; do FileCheck %s < $f; done
+// RUN: [ `ls %t.log.* | wc -l` == 2 ]
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(int argc, char **argv) {
+  void *x = malloc(10);
+  free(x);
+  if (fork() == -1) return 1;
+  // There are two processes at this point, thus there should be two distinct
+  // error logs.
+  free(x);
+  return 0;
+}
+
+// CHECK: ERROR: AddressSanitizer
diff --git a/lib/asan/lit_tests/memcmp_strict_test.cc b/lib/asan/lit_tests/memcmp_strict_test.cc
new file mode 100644
index 0000000..00bf921
--- /dev/null
+++ b/lib/asan/lit_tests/memcmp_strict_test.cc
@@ -0,0 +1,16 @@
+// RUN: %clangxx_asan -m64 -O0 %s -o %t && ASAN_OPTIONS=strict_memcmp=0 %t 2>&1 | %symbolize | FileCheck %s --check-prefix=CHECK-nonstrict
+// RUN: %clangxx_asan -m64 -O0 %s -o %t && ASAN_OPTIONS=strict_memcmp=1 %t 2>&1 | %symbolize | FileCheck %s --check-prefix=CHECK-strict
+// Default to strict_memcmp=1.
+// RUN: %clangxx_asan -m64 -O0 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s --check-prefix=CHECK-strict
+
+#include <stdio.h>
+#include <string.h>
+int main() {
+  char kFoo[] = "foo";
+  char kFubar[] = "fubar";
+  int res = memcmp(kFoo, kFubar, strlen(kFubar));
+  printf("res: %d\n", res);
+  // CHECK-nonstrict: {{res: -1}}
+  // CHECK-strict: AddressSanitizer: stack-buffer-overflow
+  return 0;
+}
diff --git a/lib/asan/lit_tests/null_deref.cc b/lib/asan/lit_tests/null_deref.cc
index 64aec12..60a521d 100644
--- a/lib/asan/lit_tests/null_deref.cc
+++ b/lib/asan/lit_tests/null_deref.cc
@@ -17,18 +17,15 @@
 
 __attribute__((noinline))
 static void NullDeref(int *ptr) {
-  ptr[10]++;
+  // CHECK: ERROR: AddressSanitizer: SEGV on unknown address
+  // CHECK:   {{0x0*00028 .*pc 0x.*}}
+  // CHECK: {{AddressSanitizer can not provide additional info.}}
+  ptr[10]++;  // BOOM
+  // atos on Mac cannot extract the symbol name correctly.
+  // CHECK-Linux: {{    #0 0x.* in NullDeref.*null_deref.cc:}}[[@LINE-2]]
+  // CHECK-Darwin: {{    #0 0x.* in .*NullDeref.*null_deref.cc:}}[[@LINE-3]]
 }
 int main() {
   NullDeref((int*)0);
+  // CHECK: {{    #1 0x.* in _?main.*null_deref.cc:}}[[@LINE-1]]
 }
-
-// CHECK: ERROR: AddressSanitizer: SEGV on unknown address
-// CHECK:   {{0x0*00028 .*pc 0x.*}}
-// CHECK: {{AddressSanitizer can not provide additional info.}}
-
-// atos on Mac cannot extract the symbol name correctly.
-// CHECK-Linux: {{    #0 0x.* in NullDeref.*null_deref.cc:20}}
-// CHECK-Darwin: {{    #0 0x.* in .*NullDeref.*null_deref.cc:20}}
-
-// CHECK: {{    #1 0x.* in _?main.*null_deref.cc:23}}
diff --git a/lib/asan/lit_tests/partial_right.cc b/lib/asan/lit_tests/partial_right.cc
new file mode 100644
index 0000000..c579262
--- /dev/null
+++ b/lib/asan/lit_tests/partial_right.cc
@@ -0,0 +1,17 @@
+// RUN: %clangxx_asan -m64 -O0 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -m64 -O1 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -m64 -O2 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -m64 -O3 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -m32 -O0 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -m32 -O1 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -m32 -O2 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -m32 -O3 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+
+#include <stdlib.h>
+int main(int argc, char **argv) {
+  volatile int *x = (int*)malloc(2*sizeof(int) + 2);
+  int res = x[2];  // BOOOM
+  // CHECK: {{READ of size 4 at 0x.* thread T0}}
+  // CHECK: [[ADDR:0x[01-9a-fa-f]+]] is located 0 bytes to the right of {{.*}}-byte region [{{.*}},{{.*}}[[ADDR]])
+  return res;
+}
diff --git a/lib/asan/lit_tests/sanity_check_pure_c.c b/lib/asan/lit_tests/sanity_check_pure_c.c
index 2b5090b..3d83065 100644
--- a/lib/asan/lit_tests/sanity_check_pure_c.c
+++ b/lib/asan/lit_tests/sanity_check_pure_c.c
@@ -13,7 +13,7 @@
   return x[5];
   // CHECK: heap-use-after-free
   // CHECK: free
-  // CHECK: main{{.*}}sanity_check_pure_c.c:12
+  // CHECK: main{{.*}}sanity_check_pure_c.c:[[@LINE-4]]
   // CHECK: malloc
-  // CHECK: main{{.*}}sanity_check_pure_c.c:11
+  // CHECK: main{{.*}}sanity_check_pure_c.c:[[@LINE-7]]
 }
diff --git a/lib/asan/lit_tests/shared-lib-test.cc b/lib/asan/lit_tests/shared-lib-test.cc
index fdda352..05bf3ec 100644
--- a/lib/asan/lit_tests/shared-lib-test.cc
+++ b/lib/asan/lit_tests/shared-lib-test.cc
@@ -49,6 +49,6 @@
   // CHECK: {{.*ERROR: AddressSanitizer: global-buffer-overflow}}
   // CHECK: {{READ of size 4 at 0x.* thread T0}}
   // CHECK: {{    #0 0x.*}}
-  // CHECK: {{    #1 0x.* in _?main .*shared-lib-test.cc:48}}
+  // CHECK: {{    #1 0x.* in _?main .*shared-lib-test.cc:}}[[@LINE-4]]
   return 0;
 }
diff --git a/lib/asan/lit_tests/stack-frame-demangle.cc b/lib/asan/lit_tests/stack-frame-demangle.cc
new file mode 100644
index 0000000..a0de4bb
--- /dev/null
+++ b/lib/asan/lit_tests/stack-frame-demangle.cc
@@ -0,0 +1,24 @@
+// Check that ASan is able to print demangled frame name even w/o
+// symbolization.
+
+// RUN: %clangxx_asan -m64 -O0 %s -o %t && %t 2>&1 | FileCheck %s
+
+#include <string.h>
+
+namespace XXX {
+struct YYY {
+  static int ZZZ(int x) {
+    char array[10];
+    memset(array, 0, 10);
+    return array[x];  // BOOOM
+    // CHECK: {{ERROR: AddressSanitizer: stack-buffer-overflow}}
+    // CHECK: {{READ of size 1 at 0x.* thread T0}}
+    // CHECK: {{Address 0x.* is .* frame <XXX::YYY::ZZZ(.*)>}}
+  }
+};
+}  // namespace XXX
+
+int main(int argc, char **argv) {
+  int res = XXX::YYY::ZZZ(argc + 10);
+  return res;
+}
diff --git a/lib/asan/lit_tests/stack-overflow.cc b/lib/asan/lit_tests/stack-overflow.cc
index 0caa7df..3deb1e9 100644
--- a/lib/asan/lit_tests/stack-overflow.cc
+++ b/lib/asan/lit_tests/stack-overflow.cc
@@ -13,7 +13,7 @@
   memset(x, 0, 10);
   int res = x[argc * 10];  // BOOOM
   // CHECK: {{READ of size 1 at 0x.* thread T0}}
-  // CHECK: {{    #0 0x.* in _?main .*stack-overflow.cc:14}}
+  // CHECK: {{    #0 0x.* in _?main .*stack-overflow.cc:}}[[@LINE-2]]
   // CHECK: {{Address 0x.* is .* frame <main>}}
   return res;
 }
diff --git a/lib/asan/lit_tests/strncpy-overflow.cc b/lib/asan/lit_tests/strncpy-overflow.cc
index 9381f13..5133b5c 100644
--- a/lib/asan/lit_tests/strncpy-overflow.cc
+++ b/lib/asan/lit_tests/strncpy-overflow.cc
@@ -22,19 +22,17 @@
   strcpy(hello, "hello");
   char *short_buffer = (char*)malloc(9);
   strncpy(short_buffer, hello, 10);  // BOOM
-  // CHECK: {{WRITE of size 1 at 0x.* thread T0}}
+  // CHECK: {{WRITE of size 10 at 0x.* thread T0}}
   // CHECK-Linux: {{    #0 0x.* in .*strncpy}}
   // CHECK-Darwin: {{    #0 0x.* in _?wrap_strncpy}}
-  // CHECK: {{    #1 0x.* in _?main .*strncpy-overflow.cc:24}}
+  // CHECK: {{    #1 0x.* in _?main .*strncpy-overflow.cc:}}[[@LINE-4]]
   // CHECK: {{0x.* is located 0 bytes to the right of 9-byte region}}
   // CHECK: {{allocated by thread T0 here:}}
 
   // CHECK-Linux: {{    #0 0x.* in .*malloc}}
-  // CHECK-Linux: {{    #1 0x.* in main .*strncpy-overflow.cc:23}}
+  // CHECK-Linux: {{    #1 0x.* in main .*strncpy-overflow.cc:}}[[@LINE-10]]
 
-  // CHECK-Darwin: {{    #0 0x.* in .*mz_malloc.*}}
-  // CHECK-Darwin: {{    #1 0x.* in malloc_zone_malloc.*}}
-  // CHECK-Darwin: {{    #2 0x.* in malloc.*}}
-  // CHECK-Darwin: {{    #3 0x.* in _?main .*strncpy-overflow.cc:23}}
+  // CHECK-Darwin: {{    #0 0x.* in _?wrap_malloc.*}}
+  // CHECK-Darwin: {{    #1 0x.* in _?main .*strncpy-overflow.cc:}}[[@LINE-13]]
   return short_buffer[8];
 }
diff --git a/lib/asan/lit_tests/throw_call_test.cc b/lib/asan/lit_tests/throw_call_test.cc
new file mode 100644
index 0000000..974bc51
--- /dev/null
+++ b/lib/asan/lit_tests/throw_call_test.cc
@@ -0,0 +1,45 @@
+// RUN: %clangxx_asan %s -o %t && %t
+// http://code.google.com/p/address-sanitizer/issues/detail?id=147 (not fixed).
+// BROKEN: %clangxx_asan %s -o %t -static-libstdc++ && %t
+#include <stdio.h>
+static volatile int zero = 0;
+inline void pretend_to_do_something(void *x) {
+  __asm__ __volatile__("" : : "r" (x) : "memory");
+}
+
+__attribute__((noinline, no_sanitize_address))
+void ReallyThrow() {
+  fprintf(stderr, "ReallyThrow\n");
+  if (zero == 0)
+    throw 42;
+}
+
+__attribute__((noinline))
+void Throw() {
+  int a, b, c, d, e;
+  pretend_to_do_something(&a);
+  pretend_to_do_something(&b);
+  pretend_to_do_something(&c);
+  pretend_to_do_something(&d);
+  pretend_to_do_something(&e);
+  fprintf(stderr, "Throw stack = %p\n", &a);
+  ReallyThrow();
+}
+
+__attribute__((noinline))
+void CheckStack() {
+  int ar[100];
+  pretend_to_do_something(ar);
+  for (int i = 0; i < 100; i++)
+    ar[i] = i;
+  fprintf(stderr, "CheckStack stack = %p, %p\n", ar, ar + 100);
+}
+
+int main(int argc, char** argv) {
+  try {
+    Throw();
+  } catch(int a) {
+    fprintf(stderr, "a = %d\n", a);
+  }
+  CheckStack();
+}
diff --git a/lib/asan/lit_tests/throw_invoke_test.cc b/lib/asan/lit_tests/throw_invoke_test.cc
new file mode 100644
index 0000000..077a940
--- /dev/null
+++ b/lib/asan/lit_tests/throw_invoke_test.cc
@@ -0,0 +1,50 @@
+// RUN: %clangxx_asan %s -o %t && %t
+// RUN: %clangxx_asan %s -o %t -static-libstdc++ && %t
+#include <stdio.h>
+static volatile int zero = 0;
+inline void pretend_to_do_something(void *x) {
+  __asm__ __volatile__("" : : "r" (x) : "memory");
+}
+
+__attribute__((noinline))
+void ReallyThrow() {
+  fprintf(stderr, "ReallyThrow\n");
+  try {
+    if (zero == 0)
+      throw 42;
+    else if (zero == 1)
+      throw 1.;
+  } catch(double x) {
+  }
+}
+
+__attribute__((noinline))
+void Throw() {
+  int a, b, c, d, e;
+  pretend_to_do_something(&a);
+  pretend_to_do_something(&b);
+  pretend_to_do_something(&c);
+  pretend_to_do_something(&d);
+  pretend_to_do_something(&e);
+  fprintf(stderr, "Throw stack = %p\n", &a);
+  ReallyThrow();
+}
+
+__attribute__((noinline))
+void CheckStack() {
+  int ar[100];
+  pretend_to_do_something(ar);
+  for (int i = 0; i < 100; i++)
+    ar[i] = i;
+  fprintf(stderr, "CheckStack stack = %p, %p\n", ar, ar + 100);
+}
+
+int main(int argc, char** argv) {
+  try {
+    Throw();
+  } catch(int a) {
+    fprintf(stderr, "a = %d\n", a);
+  }
+  CheckStack();
+}
+
diff --git a/lib/asan/lit_tests/use-after-free-right.cc b/lib/asan/lit_tests/use-after-free-right.cc
new file mode 100644
index 0000000..b0de07b
--- /dev/null
+++ b/lib/asan/lit_tests/use-after-free-right.cc
@@ -0,0 +1,46 @@
+// RUN: %clangxx_asan -m64 -O0 %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-%os < %t.out
+// RUN: %clangxx_asan -m64 -O1 %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-%os < %t.out
+// RUN: %clangxx_asan -m64 -O2 %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-%os < %t.out
+// RUN: %clangxx_asan -m64 -O3 %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-%os < %t.out
+// RUN: %clangxx_asan -m32 -O0 %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-%os < %t.out
+// RUN: %clangxx_asan -m32 -O1 %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-%os < %t.out
+// RUN: %clangxx_asan -m32 -O2 %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-%os < %t.out
+// RUN: %clangxx_asan -m32 -O3 %s -o %t && %t 2>&1 | %symbolize > %t.out
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-%os < %t.out
+
+// Test use-after-free report in the case when access is at the right border of
+//  the allocation.
+
+#include <stdlib.h>
+int main() {
+  volatile char *x = (char*)malloc(sizeof(char));
+  free((void*)x);
+  *x = 42;
+  // CHECK: {{.*ERROR: AddressSanitizer: heap-use-after-free on address}}
+  // CHECK:   {{0x.* at pc 0x.* bp 0x.* sp 0x.*}}
+  // CHECK: {{WRITE of size 1 at 0x.* thread T0}}
+  // CHECK: {{    #0 0x.* in _?main .*use-after-free-right.cc:25}}
+  // CHECK: {{0x.* is located 0 bytes inside of 1-byte region .0x.*,0x.*}}
+  // CHECK: {{freed by thread T0 here:}}
+
+  // CHECK-Linux: {{    #0 0x.* in .*free}}
+  // CHECK-Linux: {{    #1 0x.* in main .*use-after-free-right.cc:24}}
+
+  // CHECK-Darwin: {{    #0 0x.* in _?wrap_free}}
+  // CHECK-Darwin: {{    #1 0x.* in _?main .*use-after-free-right.cc:24}}
+
+  // CHECK: {{previously allocated by thread T0 here:}}
+
+  // CHECK-Linux: {{    #0 0x.* in .*malloc}}
+  // CHECK-Linux: {{    #1 0x.* in main .*use-after-free-right.cc:23}}
+
+  // CHECK-Darwin: {{    #0 0x.* in _?wrap_malloc.*}}
+  // CHECK-Darwin: {{    #1 0x.* in _?main .*use-after-free-right.cc:23}}
+}
diff --git a/lib/asan/lit_tests/use-after-free.cc b/lib/asan/lit_tests/use-after-free.cc
index 24d5a2a..aee185d 100644
--- a/lib/asan/lit_tests/use-after-free.cc
+++ b/lib/asan/lit_tests/use-after-free.cc
@@ -30,19 +30,14 @@
   // CHECK-Linux: {{    #0 0x.* in .*free}}
   // CHECK-Linux: {{    #1 0x.* in main .*use-after-free.cc:21}}
 
-  // CHECK-Darwin: {{    #0 0x.* in .*free_common.*}}
-  // CHECK-Darwin: {{    #1 0x.* in .*mz_free.*}}
-  // We override free() on Darwin, thus no malloc_zone_free
-  // CHECK-Darwin: {{    #2 0x.* in _?wrap_free}}
-  // CHECK-Darwin: {{    #3 0x.* in _?main .*use-after-free.cc:21}}
+  // CHECK-Darwin: {{    #0 0x.* in _?wrap_free}}
+  // CHECK-Darwin: {{    #1 0x.* in _?main .*use-after-free.cc:21}}
 
   // CHECK: {{previously allocated by thread T0 here:}}
 
   // CHECK-Linux: {{    #0 0x.* in .*malloc}}
   // CHECK-Linux: {{    #1 0x.* in main .*use-after-free.cc:20}}
 
-  // CHECK-Darwin: {{    #0 0x.* in .*mz_malloc.*}}
-  // CHECK-Darwin: {{    #1 0x.* in malloc_zone_malloc.*}}
-  // CHECK-Darwin: {{    #2 0x.* in malloc.*}}
-  // CHECK-Darwin: {{    #3 0x.* in _?main .*use-after-free.cc:20}}
+  // CHECK-Darwin: {{    #0 0x.* in _?wrap_malloc.*}}
+  // CHECK-Darwin: {{    #1 0x.* in _?main .*use-after-free.cc:20}}
 }
diff --git a/lib/asan/lit_tests/use-after-scope-inlined.cc b/lib/asan/lit_tests/use-after-scope-inlined.cc
index c192bc2..3d730de 100644
--- a/lib/asan/lit_tests/use-after-scope-inlined.cc
+++ b/lib/asan/lit_tests/use-after-scope-inlined.cc
@@ -24,6 +24,6 @@
   // CHECK:   #0 0x{{.*}} in {{_?}}main
   // CHECK:      {{.*}}use-after-scope-inlined.cc:[[@LINE-4]]
   // CHECK: Address 0x{{.*}} is located at offset
-  // CHECK:      [[OFFSET:[^ ]*]] in frame <main> of T0's stack:
+  // CHECK:      [[OFFSET:[^ ]*]] in frame <main> of T0{{.*}}:
   // CHECK:   {{\[}}[[OFFSET]], {{.*}}) 'x.i'
 }
diff --git a/lib/asan/scripts/asan_symbolize.py b/lib/asan/scripts/asan_symbolize.py
index 7b30bb5..bd3bf1e 100755
--- a/lib/asan/scripts/asan_symbolize.py
+++ b/lib/asan/scripts/asan_symbolize.py
@@ -8,6 +8,7 @@
 #
 #===------------------------------------------------------------------------===#
 import bisect
+import getopt
 import os
 import re
 import subprocess
@@ -18,6 +19,7 @@
 filetypes = {}
 vmaddrs = {}
 DEBUG = False
+demangle = False;
 
 
 # FIXME: merge the code that calls fix_filename().
@@ -60,7 +62,7 @@
       return None
     cmd = [self.symbolizer_path,
            '--use-symbol-table=true',
-           '--demangle=false',
+           '--demangle=%s' % demangle,
            '--functions=true',
            '--inlining=true']
     if DEBUG:
@@ -111,7 +113,10 @@
     self.pipe = self.open_addr2line()
 
   def open_addr2line(self):
-    cmd = ['addr2line', '-f', '-e', self.binary]
+    cmd = ['addr2line', '-f']
+    if demangle:
+      cmd += ['--demangle']
+    cmd += ['-e', self.binary]
     if DEBUG:
       print ' '.join(cmd)
     return subprocess.Popen(cmd,
@@ -352,5 +357,9 @@
 
 
 if __name__ == '__main__':
+  opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"])
+  for o, a in opts:
+    if o in ("-d", "--demangle"):
+      demangle = True;
   loop = SymbolizationLoop()
   loop.process_stdin()
diff --git a/lib/asan/tests/CMakeLists.txt b/lib/asan/tests/CMakeLists.txt
index 44f188c..ca18084 100644
--- a/lib/asan/tests/CMakeLists.txt
+++ b/lib/asan/tests/CMakeLists.txt
@@ -10,168 +10,197 @@
 # instrumentation against the just-built runtime library.
 
 include(CheckCXXCompilerFlag)
+include(CompilerRTCompile)
 
 include_directories(..)
 include_directories(../..)
 
+# Use zero-based shadow on Android.
+if(ANDROID)
+  set(ASAN_TESTS_USE_ZERO_BASE_SHADOW TRUE)
+else()
+  set(ASAN_TESTS_USE_ZERO_BASE_SHADOW FALSE)
+endif()
+
+set(ASAN_UNITTEST_HEADERS
+  asan_mac_test.h
+  asan_test_config.h
+  asan_test_utils.h)
+
 set(ASAN_UNITTEST_COMMON_CFLAGS
+  ${COMPILER_RT_GTEST_INCLUDE_CFLAGS}
+  -I${COMPILER_RT_SOURCE_DIR}/include
+  -I${COMPILER_RT_SOURCE_DIR}/lib
+  -I${COMPILER_RT_SOURCE_DIR}/lib/asan
+  -I${COMPILER_RT_SOURCE_DIR}/lib/sanitizer_common/tests
   -Wall
   -Wno-format
   -Werror
-  -fvisibility=hidden
   -g
   -O2
 )
 
+if(ASAN_TESTS_USE_ZERO_BASE_SHADOW)
+  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -fPIE)
+endif()
 if(SUPPORTS_NO_VARIADIC_MACROS_FLAG)
   list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -Wno-variadic-macros)
 endif()
 
 # Use -D instead of definitions to please custom compile command.
+list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
+  -DASAN_HAS_BLACKLIST=1
+  -DASAN_HAS_EXCEPTIONS=1
+  -DASAN_UAR=0)
 if(ANDROID)
   list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
+    -DASAN_FLEXIBLE_MAPPING_AND_OFFSET=0
     -DASAN_LOW_MEMORY=1
-    -DASAN_HAS_BLACKLIST=1
-    -DASAN_HAS_EXCEPTIONS=1
-    -DASAN_NEEDS_SEGV=0
-    -DASAN_UAR=0
-    -fPIE
-  )
+    -DASAN_NEEDS_SEGV=0)
 else()
   list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
-    -DASAN_HAS_BLACKLIST=1
-    -DASAN_HAS_EXCEPTIONS=1
-    -DASAN_NEEDS_SEGV=1
-    -DASAN_UAR=0
-  )
+    -DASAN_FLEXIBLE_MAPPING_AND_OFFSET=1
+    -DASAN_LOW_MEMORY=0
+    -DASAN_NEEDS_SEGV=1)
 endif()
 
-# Support 64-bit and 32-bit builds.
-if(LLVM_BUILD_32_BITS)
-  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -m32)
-else()
-  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -m64)
+set(ASAN_LINK_FLAGS)
+if(ASAN_TESTS_USE_ZERO_BASE_SHADOW)
+  list(APPEND ASAN_LINK_FLAGS -pie)
 endif()
-
-set(ASAN_GTEST_INCLUDE_CFLAGS
-  -I${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include
-  -I${LLVM_MAIN_SRC_DIR}/include
-  -I${LLVM_BINARY_DIR}/include
-  -D__STDC_CONSTANT_MACROS
-  -D__STDC_LIMIT_MACROS
-)
+# On Android, we link with ASan runtime manually. On other platforms we depend
+# on Clang driver behavior, passing -fsanitize=address flag.
+if(NOT ANDROID)
+  list(APPEND ASAN_LINK_FLAGS -fsanitize=address)
+endif()
+# Unit tests on Mac depend on Foundation.
+if(APPLE)
+  list(APPEND ASAN_LINK_FLAGS -framework Foundation)
+endif()
+# Unit tests require libstdc++.
+list(APPEND ASAN_LINK_FLAGS -lstdc++)
 
 set(ASAN_BLACKLIST_FILE "${CMAKE_CURRENT_SOURCE_DIR}/asan_test.ignore")
 
 set(ASAN_UNITTEST_INSTRUMENTED_CFLAGS
   ${ASAN_UNITTEST_COMMON_CFLAGS}
-  ${ASAN_GTEST_INCLUDE_CFLAGS}
   -fsanitize=address
-  -mllvm "-asan-blacklist=${ASAN_BLACKLIST_FILE}"
+  "-fsanitize-blacklist=${ASAN_BLACKLIST_FILE}"
   -mllvm -asan-stack=1
   -mllvm -asan-globals=1
   -mllvm -asan-mapping-scale=0        # default will be used
   -mllvm -asan-mapping-offset-log=-1  # default will be used
   -mllvm -asan-use-after-return=0
 )
-
-function(add_asan_test testsuite testname)
-  add_unittest(${testsuite} ${testname} ${ARGN})
-  if (APPLE)
-    # Darwin-specific linker flags.
-    set_property(TARGET ${testname} APPEND PROPERTY
-                 LINK_FLAGS "-framework Foundation")
-    target_link_libraries(${testname} clang_rt.asan_osx)
-  elseif (ANDROID)
-    target_link_libraries(${testname} clang_rt.asan-arm-android)
-  elseif (UNIX)
-    # Linux-specific linker flags.
-    set_property(TARGET ${testname} APPEND PROPERTY
-                 LINK_FLAGS "-lpthread -ldl -rdynamic")
-    if(LLVM_BUILD_32_BITS)
-      target_link_libraries(${testname} clang_rt.asan-i386)
-    else()
-      target_link_libraries(${testname} clang_rt.asan-x86_64)
-    endif()
-  endif()
-  set(add_compile_flags "")
-  get_property(compile_flags TARGET ${testname} PROPERTY COMPILE_FLAGS)
-  foreach(arg ${ASAN_UNITTEST_COMMON_CFLAGS})
-    set(add_compile_flags "${add_compile_flags} ${arg}")
-  endforeach(arg ${ASAN_UNITTEST_COMMON_CFLAGS})
-  set_property(TARGET ${testname} PROPERTY COMPILE_FLAGS
-               "${compile_flags} ${add_compile_flags}")
-endfunction()
-
-set(ASAN_NOINST_TEST_SOURCES
-  asan_noinst_test.cc
-  asan_test_main.cc
-)
-
-set(ASAN_INST_TEST_OBJECTS)
-
-# We only support building instrumented tests when we're not cross compiling
-# and targeting a unix-like system where we can predict viable compilation and
-# linking strategies.
-# We use a different approach to build these tests for Android. See below.
-if("${CMAKE_HOST_SYSTEM}" STREQUAL "${CMAKE_SYSTEM}" AND UNIX AND NOT ANDROID)
-
-  # This function is a custom routine to manage manually compiling source files
-  # for unit tests with the just-built Clang binary, using the ASan
-  # instrumentation, and linking them into a test executable.
-  function(add_asan_compile_command source extra_cflags)
-    set(output_obj "${source}.asan.o")
-    add_custom_command(
-      OUTPUT ${output_obj}
-      COMMAND clang
-              ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS}
-              ${extra_cflags}
-              -c -o "${output_obj}"
-              ${CMAKE_CURRENT_SOURCE_DIR}/${source}
-      MAIN_DEPENDENCY ${source}
-      DEPENDS clang ${ASAN_RUNTIME_LIBRARIES} ${ASAN_BLACKLIST_FILE} ${ARGN}
-      )
-  endfunction()
-
-  add_asan_compile_command(asan_globals_test.cc "")
-  add_asan_compile_command(asan_test.cc "")
-  list(APPEND ASAN_INST_TEST_OBJECTS asan_globals_test.cc.asan.o
-                                     asan_test.cc.asan.o)
-  if (APPLE)
-    add_asan_compile_command(asan_mac_test.mm "-ObjC")
-    list(APPEND ASAN_INST_TEST_OBJECTS asan_mac_test.mm.asan.o)
-  endif()
-
-  # Build benchmarks test instrumented with AddressSanitizer.
-  add_asan_compile_command(asan_benchmarks_test.cc "")
-  add_custom_target(AsanBenchmarks)
-  set_target_properties(AsanBenchmarks PROPERTIES FOLDER "Asan benchmarks")
-  add_asan_test(AsanBenchmarks AsanBenchmark asan_benchmarks_test.cc.asan.o)
+if(ASAN_TESTS_USE_ZERO_BASE_SHADOW)
+  list(APPEND ASAN_UNITTEST_INSTRUMENTED_CFLAGS
+    -fsanitize-address-zero-base-shadow)
 endif()
 
+# Compile source for the given architecture, using compiler
+# options in ${ARGN}, and add it to the object list.
+macro(asan_compile obj_list source arch)
+  get_filename_component(basename ${source} NAME)
+  set(output_obj "${basename}.${arch}.o")
+  get_target_flags_for_arch(${arch} TARGET_CFLAGS)
+  clang_compile(${output_obj} ${source}
+                CFLAGS ${ARGN} ${TARGET_CFLAGS}
+                DEPS gtest ${ASAN_RUNTIME_LIBRARIES}
+                           ${ASAN_UNITTEST_HEADERS}
+                           ${ASAN_BLACKLIST_FILE})
+  list(APPEND ${obj_list} ${output_obj})
+endmacro()
+
+# Link ASan unit test for a given architecture from a set
+# of objects in ${ARGN}.
+macro(add_asan_test test_suite test_name arch)
+  get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
+  add_compiler_rt_test(${test_suite} ${test_name}
+                       OBJECTS ${ARGN}
+                       DEPS ${ASAN_RUNTIME_LIBRARIES} ${ARGN}
+                       LINK_FLAGS ${ASAN_LINK_FLAGS}
+                                  ${TARGET_LINK_FLAGS})
+endmacro()
+
 # Main AddressSanitizer unit tests.
 add_custom_target(AsanUnitTests)
 set_target_properties(AsanUnitTests PROPERTIES FOLDER "ASan unit tests")
+# ASan benchmarks (not actively used now).
+add_custom_target(AsanBenchmarks)
+set_target_properties(AsanBenchmarks PROPERTIES FOLDER "Asan benchmarks")
+
+set(ASAN_NOINST_TEST_SOURCES
+  asan_noinst_test.cc
+  asan_test_main.cc)
+set(ASAN_INST_TEST_SOURCES
+  asan_globals_test.cc
+  asan_test.cc
+  asan_oob_test.cc
+  asan_mem_test.cc
+  asan_str_test.cc)
+
+# Adds ASan unit tests and benchmarks for architecture.
+macro(add_asan_tests_for_arch arch)
+  # Build gtest instrumented with ASan.
+  set(ASAN_INST_GTEST)
+  asan_compile(ASAN_INST_GTEST ${COMPILER_RT_GTEST_SOURCE} ${arch} 
+                               ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS})
+  # Instrumented tests.
+  set(ASAN_INST_TEST_OBJECTS)
+  foreach(src ${ASAN_INST_TEST_SOURCES})
+    asan_compile(ASAN_INST_TEST_OBJECTS ${src} ${arch}
+                 ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS})
+  endforeach()
+  # Add Mac-specific tests.
+  if (APPLE)
+    asan_compile(ASAN_INST_TEST_OBJECTS asan_mac_test.cc ${arch}
+                 ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS})
+    asan_compile(ASAN_INST_TEST_OBJECTS asan_mac_test_helpers.mm ${arch}
+                 ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} -ObjC)
+  endif()
+  # Uninstrumented tests.
+  set(ASAN_NOINST_TEST_OBJECTS)
+  foreach(src ${ASAN_NOINST_TEST_SOURCES})
+    asan_compile(ASAN_NOINST_TEST_OBJECTS ${src} ${arch}
+                 ${ASAN_UNITTEST_COMMON_CFLAGS})
+  endforeach()
+  # Link everything together.
+  add_asan_test(AsanUnitTests "Asan-${arch}-Test" ${arch}
+                ${ASAN_NOINST_TEST_OBJECTS}
+                ${ASAN_INST_TEST_OBJECTS} ${ASAN_INST_GTEST})
+
+  # Instrumented benchmarks.
+  set(ASAN_BENCHMARKS_OBJECTS)
+  asan_compile(ASAN_BENCHMARKS_OBJECTS asan_benchmarks_test.cc ${arch}
+               ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS})
+  # Link benchmarks.
+  add_asan_test(AsanBenchmarks "Asan-${arch}-Benchmark" ${arch}
+                ${ASAN_BENCHMARKS_OBJECTS} ${ASAN_INST_GTEST})
+endmacro()
+
+if(COMPILER_RT_CAN_EXECUTE_TESTS)
+  foreach(arch ${ASAN_SUPPORTED_ARCH})
+    add_asan_tests_for_arch(${arch})
+  endforeach()
+endif()
 
 if(ANDROID)
-  set(ASAN_INST_TEST_SOURCES asan_globals_test.cc asan_test.cc)
-  add_library(asan_noinst_test OBJECT
-    ${ASAN_NOINST_TEST_SOURCES}
-    )
-  set_target_compile_flags(asan_noinst_test
-    ${ASAN_UNITTEST_COMMON_CFLAGS} ${ASAN_GTEST_INCLUDE_CFLAGS}
-    )
-  add_asan_test(AsanUnitTests AsanTest
-    ${ASAN_INST_TEST_SOURCES}
+  # We assume that unit tests on Android are built in a build
+  # tree with fresh Clang as a host compiler.
+  add_library(asan_noinst_test OBJECT ${ASAN_NOINST_TEST_SOURCES})
+  set_target_compile_flags(asan_noinst_test ${ASAN_UNITTEST_COMMON_CFLAGS})
+  add_library(asan_inst_test OBJECT
+              ${ASAN_INST_TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE})  
+  set_target_compile_flags(asan_inst_test ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS})
+  add_executable(AsanTest
     $<TARGET_OBJECTS:asan_noinst_test>
-    )
-  set_target_compile_flags(AsanTest
-    ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} ${ASAN_GTEST_INCLUDE_CFLAGS}
-    )
-  set_target_link_flags(AsanTest
-    -pie
-    )
-else()
-  add_asan_test(AsanUnitTests AsanTest ${ASAN_NOINST_TEST_SOURCES}
-    ${ASAN_INST_TEST_OBJECTS})
+    $<TARGET_OBJECTS:asan_inst_test>
+  )
+  # Setup correct output directory and link flags.
+  set_target_properties(AsanTest PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+  set_target_link_flags(AsanTest ${ASAN_LINK_FLAGS})
+  target_link_libraries(AsanTest clang_rt.asan-arm-android)
+  # Add unit test to test suite.
+  add_dependencies(AsanUnitTests AsanTest)
 endif()
diff --git a/lib/asan/tests/asan_globals_test.cc b/lib/asan/tests/asan_globals_test.cc
index dc2e9bb..5042ef0 100644
--- a/lib/asan/tests/asan_globals_test.cc
+++ b/lib/asan/tests/asan_globals_test.cc
@@ -11,8 +11,29 @@
 //
 // Some globals in a separate file.
 //===----------------------------------------------------------------------===//
+#include "asan_test_utils.h"
 
-extern char glob5[5];
+char glob1[1];
+char glob2[2];
+char glob3[3];
+char glob4[4];
+char glob5[5];
+char glob6[6];
+char glob7[7];
+char glob8[8];
+char glob9[9];
+char glob10[10];
+char glob11[11];
+char glob12[12];
+char glob13[13];
+char glob14[14];
+char glob15[15];
+char glob16[16];
+char glob17[17];
+char glob1000[1000];
+char glob10000[10000];
+char glob100000[100000];
+
 static char static10[10];
 
 int GlobalsTest(int zero) {
diff --git a/lib/asan/tests/asan_mac_test.cc b/lib/asan/tests/asan_mac_test.cc
new file mode 100644
index 0000000..cabdfd7
--- /dev/null
+++ b/lib/asan/tests/asan_mac_test.cc
@@ -0,0 +1,236 @@
+//===-- asan_test_mac.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+
+#include "asan_test_utils.h"
+
+#include "asan_mac_test.h"
+
+#include <malloc/malloc.h>
+#include <AvailabilityMacros.h>  // For MAC_OS_X_VERSION_*
+#include <CoreFoundation/CFString.h>
+
+TEST(AddressSanitizerMac, CFAllocatorDefaultDoubleFree) {
+  EXPECT_DEATH(
+      CFAllocatorDefaultDoubleFree(NULL),
+      "attempting double-free");
+}
+
+void CFAllocator_DoubleFreeOnPthread() {
+  pthread_t child;
+  PTHREAD_CREATE(&child, NULL, CFAllocatorDefaultDoubleFree, NULL);
+  PTHREAD_JOIN(child, NULL);  // Shouldn't be reached.
+}
+
+TEST(AddressSanitizerMac, CFAllocatorDefaultDoubleFree_ChildPhread) {
+  EXPECT_DEATH(CFAllocator_DoubleFreeOnPthread(), "attempting double-free");
+}
+
+namespace {
+
+void *GLOB;
+
+void *CFAllocatorAllocateToGlob(void *unused) {
+  GLOB = CFAllocatorAllocate(NULL, 100, /*hint*/0);
+  return NULL;
+}
+
+void *CFAllocatorDeallocateFromGlob(void *unused) {
+  char *p = (char*)GLOB;
+  p[100] = 'A';  // ASan should report an error here.
+  CFAllocatorDeallocate(NULL, GLOB);
+  return NULL;
+}
+
+void CFAllocator_PassMemoryToAnotherThread() {
+  pthread_t th1, th2;
+  PTHREAD_CREATE(&th1, NULL, CFAllocatorAllocateToGlob, NULL);
+  PTHREAD_JOIN(th1, NULL);
+  PTHREAD_CREATE(&th2, NULL, CFAllocatorDeallocateFromGlob, NULL);
+  PTHREAD_JOIN(th2, NULL);
+}
+
+TEST(AddressSanitizerMac, CFAllocator_PassMemoryToAnotherThread) {
+  EXPECT_DEATH(CFAllocator_PassMemoryToAnotherThread(),
+               "heap-buffer-overflow");
+}
+
+}  // namespace
+
+// TODO(glider): figure out whether we still need these tests. Is it correct
+// to intercept the non-default CFAllocators?
+TEST(AddressSanitizerMac, DISABLED_CFAllocatorSystemDefaultDoubleFree) {
+  EXPECT_DEATH(
+      CFAllocatorSystemDefaultDoubleFree(),
+      "attempting double-free");
+}
+
+// We're intercepting malloc, so kCFAllocatorMalloc is routed to ASan.
+TEST(AddressSanitizerMac, CFAllocatorMallocDoubleFree) {
+  EXPECT_DEATH(CFAllocatorMallocDoubleFree(), "attempting double-free");
+}
+
+TEST(AddressSanitizerMac, DISABLED_CFAllocatorMallocZoneDoubleFree) {
+  EXPECT_DEATH(CFAllocatorMallocZoneDoubleFree(), "attempting double-free");
+}
+
+// For libdispatch tests below we check that ASan got to the shadow byte
+// legend, i.e. managed to print the thread stacks (this almost certainly
+// means that the libdispatch task creation has been intercepted correctly).
+TEST(AddressSanitizerMac, GCDDispatchAsync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDDispatchAsync(), "Shadow byte legend");
+}
+
+TEST(AddressSanitizerMac, GCDDispatchSync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDDispatchSync(), "Shadow byte legend");
+}
+
+
+TEST(AddressSanitizerMac, GCDReuseWqthreadsAsync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDReuseWqthreadsAsync(), "Shadow byte legend");
+}
+
+TEST(AddressSanitizerMac, GCDReuseWqthreadsSync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDReuseWqthreadsSync(), "Shadow byte legend");
+}
+
+TEST(AddressSanitizerMac, GCDDispatchAfter) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDDispatchAfter(), "Shadow byte legend");
+}
+
+TEST(AddressSanitizerMac, GCDSourceEvent) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDSourceEvent(), "Shadow byte legend");
+}
+
+TEST(AddressSanitizerMac, GCDSourceCancel) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDSourceCancel(), "Shadow byte legend");
+}
+
+TEST(AddressSanitizerMac, GCDGroupAsync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDGroupAsync(), "Shadow byte legend");
+}
+
+void *MallocIntrospectionLockWorker(void *_) {
+  const int kNumPointers = 100;
+  int i;
+  void *pointers[kNumPointers];
+  for (i = 0; i < kNumPointers; i++) {
+    pointers[i] = malloc(i + 1);
+  }
+  for (i = 0; i < kNumPointers; i++) {
+    free(pointers[i]);
+  }
+
+  return NULL;
+}
+
+void *MallocIntrospectionLockForker(void *_) {
+  pid_t result = fork();
+  if (result == -1) {
+    perror("fork");
+  }
+  assert(result != -1);
+  if (result == 0) {
+    // Call malloc in the child process to make sure we won't deadlock.
+    void *ptr = malloc(42);
+    free(ptr);
+    exit(0);
+  } else {
+    // Return in the parent process.
+    return NULL;
+  }
+}
+
+TEST(AddressSanitizerMac, MallocIntrospectionLock) {
+  // Incorrect implementation of force_lock and force_unlock in our malloc zone
+  // will cause forked processes to deadlock.
+  // TODO(glider): need to detect that none of the child processes deadlocked.
+  const int kNumWorkers = 5, kNumIterations = 100;
+  int i, iter;
+  for (iter = 0; iter < kNumIterations; iter++) {
+    pthread_t workers[kNumWorkers], forker;
+    for (i = 0; i < kNumWorkers; i++) {
+      PTHREAD_CREATE(&workers[i], 0, MallocIntrospectionLockWorker, 0);
+    }
+    PTHREAD_CREATE(&forker, 0, MallocIntrospectionLockForker, 0);
+    for (i = 0; i < kNumWorkers; i++) {
+      PTHREAD_JOIN(workers[i], 0);
+    }
+    PTHREAD_JOIN(forker, 0);
+  }
+}
+
+void *TSDAllocWorker(void *test_key) {
+  if (test_key) {
+    void *mem = malloc(10);
+    pthread_setspecific(*(pthread_key_t*)test_key, mem);
+  }
+  return NULL;
+}
+
+TEST(AddressSanitizerMac, DISABLED_TSDWorkqueueTest) {
+  pthread_t th;
+  pthread_key_t test_key;
+  pthread_key_create(&test_key, CallFreeOnWorkqueue);
+  PTHREAD_CREATE(&th, NULL, TSDAllocWorker, &test_key);
+  PTHREAD_JOIN(th, NULL);
+  pthread_key_delete(test_key);
+}
+
+// Test that CFStringCreateCopy does not copy constant strings.
+TEST(AddressSanitizerMac, CFStringCreateCopy) {
+  CFStringRef str = CFSTR("Hello world!\n");
+  CFStringRef str2 = CFStringCreateCopy(0, str);
+  EXPECT_EQ(str, str2);
+}
+
+TEST(AddressSanitizerMac, NSObjectOOB) {
+  // Make sure that our allocators are used for NSObjects.
+  EXPECT_DEATH(TestOOBNSObjects(), "heap-buffer-overflow");
+}
+
+// Make sure that correct pointer is passed to free() when deallocating a
+// NSURL object.
+// See http://code.google.com/p/address-sanitizer/issues/detail?id=70.
+TEST(AddressSanitizerMac, NSURLDeallocation) {
+  TestNSURLDeallocation();
+}
+
+// See http://code.google.com/p/address-sanitizer/issues/detail?id=109.
+TEST(AddressSanitizerMac, Mstats) {
+  malloc_statistics_t stats1, stats2;
+  malloc_zone_statistics(/*all zones*/NULL, &stats1);
+  const size_t kMallocSize = 100000;
+  void *alloc = Ident(malloc(kMallocSize));
+  malloc_zone_statistics(/*all zones*/NULL, &stats2);
+  EXPECT_GT(stats2.blocks_in_use, stats1.blocks_in_use);
+  EXPECT_GE(stats2.size_in_use - stats1.size_in_use, kMallocSize);
+  free(alloc);
+  // Even the default OSX allocator may not change the stats after free().
+}
+
diff --git a/lib/asan/tests/asan_mac_test.mm b/lib/asan/tests/asan_mac_test_helpers.mm
similarity index 95%
rename from lib/asan/tests/asan_mac_test.mm
rename to lib/asan/tests/asan_mac_test_helpers.mm
index 4e5873b..4cbd2bb 100644
--- a/lib/asan/tests/asan_mac_test.mm
+++ b/lib/asan/tests/asan_mac_test_helpers.mm
@@ -57,7 +57,7 @@
 @implementation LoadSomething
 
 +(void) load {
-  for (int i = 0; i < strlen(kStartupStr); i++) {
+  for (size_t i = 0; i < strlen(kStartupStr); i++) {
     access_memory(&kStartupStr[i]);  // make sure no optimizations occur.
   }
   // Don't print anything here not to interfere with the death tests.
@@ -66,13 +66,13 @@
 @end
 
 void worker_do_alloc(int size) {
-  char * volatile mem = malloc(size);
+  char * volatile mem = (char * volatile)malloc(size);
   mem[0] = 0; // Ok
   free(mem);
 }
 
 void worker_do_crash(int size) {
-  char * volatile mem = malloc(size);
+  char * volatile mem = (char * volatile)malloc(size);
   access_memory(&mem[size]);  // BOOM
   free(mem);
 }
@@ -167,7 +167,7 @@
       dispatch_time(DISPATCH_TIME_NOW, 1LL * NSEC_PER_SEC);
 
   dispatch_source_set_timer(timer, milestone, DISPATCH_TIME_FOREVER, 0);
-  char * volatile mem = malloc(10);
+  char * volatile mem = (char * volatile)malloc(10);
   dispatch_source_set_event_handler(timer, ^{
     access_memory(&mem[10]);
   });
@@ -184,7 +184,7 @@
       dispatch_time(DISPATCH_TIME_NOW, 1LL * NSEC_PER_SEC);
 
   dispatch_source_set_timer(timer, milestone, DISPATCH_TIME_FOREVER, 0);
-  char * volatile mem = malloc(10);
+  char * volatile mem = (char * volatile)malloc(10);
   // Both dispatch_source_set_cancel_handler() and
   // dispatch_source_set_event_handler() use dispatch_barrier_async_f().
   // It's tricky to test dispatch_source_set_cancel_handler() separately,
@@ -202,7 +202,7 @@
 void TestGCDGroupAsync() {
   dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
   dispatch_group_t group = dispatch_group_create(); 
-  char * volatile mem = malloc(10);
+  char * volatile mem = (char * volatile)malloc(10);
   dispatch_group_async(group, queue, ^{
     access_memory(&mem[10]);
   });
diff --git a/lib/asan/tests/asan_mem_test.cc b/lib/asan/tests/asan_mem_test.cc
new file mode 100644
index 0000000..60f5cd4
--- /dev/null
+++ b/lib/asan/tests/asan_mem_test.cc
@@ -0,0 +1,240 @@
+//===-- asan_mem_test.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#include "asan_test_utils.h"
+
+template<typename T>
+void MemSetOOBTestTemplate(size_t length) {
+  if (length == 0) return;
+  size_t size = Ident(sizeof(T) * length);
+  T *array = Ident((T*)malloc(size));
+  int element = Ident(42);
+  int zero = Ident(0);
+  void *(*MEMSET)(void *s, int c, size_t n) = Ident(memset);
+  // memset interval inside array
+  MEMSET(array, element, size);
+  MEMSET(array, element, size - 1);
+  MEMSET(array + length - 1, element, sizeof(T));
+  MEMSET(array, element, 1);
+
+  // memset 0 bytes
+  MEMSET(array - 10, element, zero);
+  MEMSET(array - 1, element, zero);
+  MEMSET(array, element, zero);
+  MEMSET(array + length, 0, zero);
+  MEMSET(array + length + 1, 0, zero);
+
+  // try to memset bytes to the right of array
+  EXPECT_DEATH(MEMSET(array, 0, size + 1),
+               RightOOBWriteMessage(0));
+  EXPECT_DEATH(MEMSET((char*)(array + length) - 1, element, 6),
+               RightOOBWriteMessage(0));
+  EXPECT_DEATH(MEMSET(array + 1, element, size + sizeof(T)),
+               RightOOBWriteMessage(0));
+  // whole interval is to the right
+  EXPECT_DEATH(MEMSET(array + length + 1, 0, 10),
+               RightOOBWriteMessage(sizeof(T)));
+
+  // try to memset bytes to the left of array
+  EXPECT_DEATH(MEMSET((char*)array - 1, element, size),
+               LeftOOBWriteMessage(1));
+  EXPECT_DEATH(MEMSET((char*)array - 5, 0, 6),
+               LeftOOBWriteMessage(5));
+  if (length >= 100) {
+    // Large OOB, we find it only if the redzone is large enough.
+    EXPECT_DEATH(memset(array - 5, element, size + 5 * sizeof(T)),
+                 LeftOOBWriteMessage(5 * sizeof(T)));
+  }
+  // whole interval is to the left
+  EXPECT_DEATH(MEMSET(array - 2, 0, sizeof(T)),
+               LeftOOBWriteMessage(2 * sizeof(T)));
+
+  // try to memset bytes both to the left & to the right
+  EXPECT_DEATH(MEMSET((char*)array - 2, element, size + 4),
+               LeftOOBWriteMessage(2));
+
+  free(array);
+}
+
+TEST(AddressSanitizer, MemSetOOBTest) {
+  MemSetOOBTestTemplate<char>(100);
+  MemSetOOBTestTemplate<int>(5);
+  MemSetOOBTestTemplate<double>(256);
+  // We can test arrays of structres/classes here, but what for?
+}
+
+// Try to allocate two arrays of 'size' bytes that are near each other.
+// Strictly speaking we are not guaranteed to find such two pointers,
+// but given the structure of asan's allocator we will.
+static bool AllocateTwoAdjacentArrays(char **x1, char **x2, size_t size) {
+  vector<char *> v;
+  bool res = false;
+  for (size_t i = 0; i < 1000U && !res; i++) {
+    v.push_back(new char[size]);
+    if (i == 0) continue;
+    sort(v.begin(), v.end());
+    for (size_t j = 1; j < v.size(); j++) {
+      assert(v[j] > v[j-1]);
+      if ((size_t)(v[j] - v[j-1]) < size * 2) {
+        *x2 = v[j];
+        *x1 = v[j-1];
+        res = true;
+        break;
+      }
+    }
+  }
+
+  for (size_t i = 0; i < v.size(); i++) {
+    if (res && v[i] == *x1) continue;
+    if (res && v[i] == *x2) continue;
+    delete [] v[i];
+  }
+  return res;
+}
+
+TEST(AddressSanitizer, LargeOOBInMemset) {
+  for (size_t size = 200; size < 100000; size += size / 2) {
+    char *x1, *x2;
+    if (!Ident(AllocateTwoAdjacentArrays)(&x1, &x2, size))
+      continue;
+    // fprintf(stderr, "  large oob memset: %p %p %zd\n", x1, x2, size);
+    // Do a memset on x1 with huge out-of-bound access that will end up in x2.
+    EXPECT_DEATH(Ident(memset)(x1, 0, size * 2),
+                 "is located 0 bytes to the right");
+    delete [] x1;
+    delete [] x2;
+    return;
+  }
+  assert(0 && "Did not find two adjacent malloc-ed pointers");
+}
+
+// Same test for memcpy and memmove functions
+template <typename T, class M>
+void MemTransferOOBTestTemplate(size_t length) {
+  if (length == 0) return;
+  size_t size = Ident(sizeof(T) * length);
+  T *src = Ident((T*)malloc(size));
+  T *dest = Ident((T*)malloc(size));
+  int zero = Ident(0);
+
+  // valid transfer of bytes between arrays
+  M::transfer(dest, src, size);
+  M::transfer(dest + 1, src, size - sizeof(T));
+  M::transfer(dest, src + length - 1, sizeof(T));
+  M::transfer(dest, src, 1);
+
+  // transfer zero bytes
+  M::transfer(dest - 1, src, 0);
+  M::transfer(dest + length, src, zero);
+  M::transfer(dest, src - 1, zero);
+  M::transfer(dest, src, zero);
+
+  // try to change mem to the right of dest
+  EXPECT_DEATH(M::transfer(dest + 1, src, size),
+               RightOOBWriteMessage(0));
+  EXPECT_DEATH(M::transfer((char*)(dest + length) - 1, src, 5),
+               RightOOBWriteMessage(0));
+
+  // try to change mem to the left of dest
+  EXPECT_DEATH(M::transfer(dest - 2, src, size),
+               LeftOOBWriteMessage(2 * sizeof(T)));
+  EXPECT_DEATH(M::transfer((char*)dest - 3, src, 4),
+               LeftOOBWriteMessage(3));
+
+  // try to access mem to the right of src
+  EXPECT_DEATH(M::transfer(dest, src + 2, size),
+               RightOOBReadMessage(0));
+  EXPECT_DEATH(M::transfer(dest, (char*)(src + length) - 3, 6),
+               RightOOBReadMessage(0));
+
+  // try to access mem to the left of src
+  EXPECT_DEATH(M::transfer(dest, src - 1, size),
+               LeftOOBReadMessage(sizeof(T)));
+  EXPECT_DEATH(M::transfer(dest, (char*)src - 6, 7),
+               LeftOOBReadMessage(6));
+
+  // Generally we don't need to test cases where both accessing src and writing
+  // to dest address to poisoned memory.
+
+  T *big_src = Ident((T*)malloc(size * 2));
+  T *big_dest = Ident((T*)malloc(size * 2));
+  // try to change mem to both sides of dest
+  EXPECT_DEATH(M::transfer(dest - 1, big_src, size * 2),
+               LeftOOBWriteMessage(sizeof(T)));
+  // try to access mem to both sides of src
+  EXPECT_DEATH(M::transfer(big_dest, src - 2, size * 2),
+               LeftOOBReadMessage(2 * sizeof(T)));
+
+  free(src);
+  free(dest);
+  free(big_src);
+  free(big_dest);
+}
+
+class MemCpyWrapper {
+ public:
+  static void* transfer(void *to, const void *from, size_t size) {
+    return Ident(memcpy)(to, from, size);
+  }
+};
+
+TEST(AddressSanitizer, MemCpyOOBTest) {
+  MemTransferOOBTestTemplate<char, MemCpyWrapper>(100);
+  MemTransferOOBTestTemplate<int, MemCpyWrapper>(1024);
+}
+
+class MemMoveWrapper {
+ public:
+  static void* transfer(void *to, const void *from, size_t size) {
+    return Ident(memmove)(to, from, size);
+  }
+};
+
+TEST(AddressSanitizer, MemMoveOOBTest) {
+  MemTransferOOBTestTemplate<char, MemMoveWrapper>(100);
+  MemTransferOOBTestTemplate<int, MemMoveWrapper>(1024);
+}
+
+
+TEST(AddressSanitizer, MemCmpOOBTest) {
+  size_t size = Ident(100);
+  char *s1 = MallocAndMemsetString(size);
+  char *s2 = MallocAndMemsetString(size);
+  // Normal memcmp calls.
+  Ident(memcmp(s1, s2, size));
+  Ident(memcmp(s1 + size - 1, s2 + size - 1, 1));
+  Ident(memcmp(s1 - 1, s2 - 1, 0));
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(Ident(memcmp)(s1 - 1, s2, 1), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(memcmp)(s1, s2 - 1, 1), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(memcmp)(s1 + size, s2, 1), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(memcmp)(s1, s2 + size, 1), RightOOBReadMessage(0));
+  // Hit unallocated memory and die.
+  EXPECT_DEATH(Ident(memcmp)(s1 + 1, s2 + 1, size), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(memcmp)(s1 + size - 1, s2, 2), RightOOBReadMessage(0));
+  // Zero bytes are not terminators and don't prevent from OOB.
+  s1[size - 1] = '\0';
+  s2[size - 1] = '\0';
+  EXPECT_DEATH(Ident(memcmp)(s1, s2, size + 1), RightOOBReadMessage(0));
+
+  // Even if the buffers differ in the first byte, we still assume that
+  // memcmp may access the whole buffer and thus reporting the overflow here:
+  s1[0] = 1;
+  s2[0] = 123;
+  EXPECT_DEATH(Ident(memcmp)(s1, s2, size + 1), RightOOBReadMessage(0));
+
+  free(s1);
+  free(s2);
+}
+
+
+
diff --git a/lib/asan/tests/asan_noinst_test.cc b/lib/asan/tests/asan_noinst_test.cc
index 7f9938a..80af7b6 100644
--- a/lib/asan/tests/asan_noinst_test.cc
+++ b/lib/asan/tests/asan_noinst_test.cc
@@ -17,7 +17,6 @@
 #include "asan_mapping.h"
 #include "asan_stack.h"
 #include "asan_test_utils.h"
-#include "sanitizer/asan_interface.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -25,14 +24,7 @@
 #include <string.h>  // for memset()
 #include <algorithm>
 #include <vector>
-
-// Simple stand-alone pseudorandom number generator.
-// Current algorithm is ANSI C linear congruential PRNG.
-static inline u32 my_rand(u32* state) {
-  return (*state = *state * 1103515245 + 12345) >> 16;
-}
-
-static u32 global_seed = 0;
+#include <limits>
 
 
 TEST(AddressSanitizer, InternalSimpleDeathTest) {
@@ -40,7 +32,7 @@
 }
 
 static void MallocStress(size_t n) {
-  u32 seed = my_rand(&global_seed);
+  u32 seed = my_rand();
   __asan::StackTrace stack1;
   stack1.trace[0] = 0xa123;
   stack1.trace[1] = 0xa456;
@@ -60,20 +52,21 @@
   for (size_t i = 0; i < n; i++) {
     if ((i % 3) == 0) {
       if (vec.empty()) continue;
-      size_t idx = my_rand(&seed) % vec.size();
+      size_t idx = my_rand_r(&seed) % vec.size();
       void *ptr = vec[idx];
       vec[idx] = vec.back();
       vec.pop_back();
-      __asan::asan_free(ptr, &stack1);
+      __asan::asan_free(ptr, &stack1, __asan::FROM_MALLOC);
     } else {
-      size_t size = my_rand(&seed) % 1000 + 1;
-      switch ((my_rand(&seed) % 128)) {
+      size_t size = my_rand_r(&seed) % 1000 + 1;
+      switch ((my_rand_r(&seed) % 128)) {
         case 0: size += 1024; break;
         case 1: size += 2048; break;
         case 2: size += 4096; break;
       }
-      size_t alignment = 1 << (my_rand(&seed) % 10 + 1);
-      char *ptr = (char*)__asan::asan_memalign(alignment, size, &stack2);
+      size_t alignment = 1 << (my_rand_r(&seed) % 10 + 1);
+      char *ptr = (char*)__asan::asan_memalign(alignment, size,
+                                               &stack2, __asan::FROM_MALLOC);
       vec.push_back(ptr);
       ptr[0] = 0;
       ptr[size-1] = 0;
@@ -81,7 +74,7 @@
     }
   }
   for (size_t i = 0; i < vec.size(); i++)
-    __asan::asan_free(vec[i], &stack3);
+    __asan::asan_free(vec[i], &stack3, __asan::FROM_MALLOC);
 }
 
 
@@ -208,7 +201,7 @@
 };
 
 void CompressStackTraceTest(size_t n_iter) {
-  u32 seed = my_rand(&global_seed);
+  u32 seed = my_rand();
   const size_t kNumPcs = ARRAY_SIZE(pc_array);
   u32 compressed[2 * kNumPcs];
 
@@ -216,9 +209,9 @@
     std::random_shuffle(pc_array, pc_array + kNumPcs);
     __asan::StackTrace stack0, stack1;
     stack0.CopyFrom(pc_array, kNumPcs);
-    stack0.size = std::max((size_t)1, (size_t)(my_rand(&seed) % stack0.size));
+    stack0.size = std::max((size_t)1, (size_t)(my_rand_r(&seed) % stack0.size));
     size_t compress_size =
-      std::max((size_t)2, (size_t)my_rand(&seed) % (2 * kNumPcs));
+      std::max((size_t)2, (size_t)my_rand_r(&seed) % (2 * kNumPcs));
     size_t n_frames =
       __asan::StackTrace::CompressStack(&stack0, compressed, compress_size);
     Ident(n_frames);
@@ -262,12 +255,12 @@
 
   const int size = 32;
   void *p = __asan::asan_malloc(size, &stack);
-  __asan::asan_free(p, &stack);
+  __asan::asan_free(p, &stack, __asan::FROM_MALLOC);
   size_t i;
   size_t max_i = 1 << 30;
   for (i = 0; i < max_i; i++) {
     void *p1 = __asan::asan_malloc(size, &stack);
-    __asan::asan_free(p1, &stack);
+    __asan::asan_free(p1, &stack, __asan::FROM_MALLOC);
     if (p1 == p) break;
   }
   // fprintf(stderr, "i=%ld\n", i);
@@ -277,14 +270,14 @@
 
 void *ThreadedQuarantineTestWorker(void *unused) {
   (void)unused;
-  u32 seed = my_rand(&global_seed);
+  u32 seed = my_rand();
   __asan::StackTrace stack;
   stack.trace[0] = 0x890;
   stack.size = 1;
 
   for (size_t i = 0; i < 1000; i++) {
-    void *p = __asan::asan_malloc(1 + (my_rand(&seed) % 4000), &stack);
-    __asan::asan_free(p, &stack);
+    void *p = __asan::asan_malloc(1 + (my_rand_r(&seed) % 4000), &stack);
+    __asan::asan_free(p, &stack, __asan::FROM_MALLOC);
   }
   return NULL;
 }
@@ -315,7 +308,7 @@
       p[i] = __asan::asan_malloc(32, &stack);
     }
     for (size_t i = 0; i < kNumMallocs; i++) {
-      __asan::asan_free(p[i], &stack);
+      __asan::asan_free(p[i], &stack, __asan::FROM_MALLOC);
     }
   }
   return NULL;
@@ -333,11 +326,13 @@
 }
 
 TEST(AddressSanitizer, MemsetWildAddressTest) {
+  using __asan::kHighMemEnd;
   typedef void*(*memset_p)(void*, int, size_t);
   // Prevent inlining of memset().
   volatile memset_p libc_memset = (memset_p)memset;
   EXPECT_DEATH(libc_memset((void*)(kLowShadowBeg + 200), 0, 100),
-               "unknown-crash.*low shadow");
+               (kLowShadowEnd == 0) ? "unknown-crash.*shadow gap"
+                                    : "unknown-crash.*low shadow");
   EXPECT_DEATH(libc_memset((void*)(kShadowGapBeg + 200), 0, 100),
                "unknown-crash.*shadow gap");
   EXPECT_DEATH(libc_memset((void*)(kHighShadowBeg + 200), 0, 100),
@@ -345,7 +340,11 @@
 }
 
 TEST(AddressSanitizerInterface, GetEstimatedAllocatedSize) {
+#if ASAN_ALLOCATOR_VERSION == 1
   EXPECT_EQ(1U, __asan_get_estimated_allocated_size(0));
+#elif ASAN_ALLOCATOR_VERSION == 2
+  EXPECT_EQ(0U, __asan_get_estimated_allocated_size(0));
+#endif
   const size_t sizes[] = { 1, 30, 1<<30 };
   for (size_t i = 0; i < 3; i++) {
     EXPECT_EQ(sizes[i], __asan_get_estimated_allocated_size(sizes[i]));
@@ -385,8 +384,17 @@
   free(array);
   EXPECT_FALSE(__asan_get_ownership(array));
   EXPECT_DEATH(__asan_get_allocated_size(array), kGetAllocatedSizeErrorMsg);
-
   delete int_ptr;
+
+  void *zero_alloc = Ident(malloc(0));
+  if (zero_alloc != 0) {
+    // If malloc(0) is not null, this pointer is owned and should have valid
+    // allocated size.
+    EXPECT_TRUE(__asan_get_ownership(zero_alloc));
+    // Allocated size is 0 or 1 depending on the allocator used.
+    EXPECT_LT(__asan_get_allocated_size(zero_alloc), 2U);
+  }
+  free(zero_alloc);
 }
 
 TEST(AddressSanitizerInterface, GetCurrentAllocatedBytesTest) {
@@ -410,6 +418,7 @@
   delete Ident(x);
 }
 
+#if ASAN_ALLOCATOR_VERSION == 1
 // This test is run in a separate process, so that large malloced
 // chunk won't remain in the free lists after the test.
 // Note: use ASSERT_* instead of EXPECT_* here.
@@ -441,9 +450,26 @@
 TEST(AddressSanitizerInterface, GetHeapSizeTest) {
   EXPECT_DEATH(RunGetHeapSizeTestAndDie(), "double-free");
 }
+#elif ASAN_ALLOCATOR_VERSION == 2
+TEST(AddressSanitizerInterface, GetHeapSizeTest) {
+  // asan_allocator2 does not keep huge chunks in free list, but unmaps them.
+  // The chunk should be greater than the quarantine size,
+  // otherwise it will be stuck in quarantine instead of being unmaped.
+  static const size_t kLargeMallocSize = 1 << 29;  // 512M
+  uptr old_heap_size = __asan_get_heap_size();
+  for (int i = 0; i < 3; i++) {
+    // fprintf(stderr, "allocating %zu bytes:\n", kLargeMallocSize);
+    free(Ident(malloc(kLargeMallocSize)));
+    EXPECT_EQ(old_heap_size, __asan_get_heap_size());
+  }
+}
+#endif
 
 // Note: use ASSERT_* instead of EXPECT_* here.
 static void DoLargeMallocForGetFreeBytesTestAndDie() {
+#if ASAN_ALLOCATOR_VERSION == 1
+  // asan_allocator2 does not keep large chunks in free_lists, so this test
+  // will not work.
   size_t old_free_bytes, new_free_bytes;
   static const size_t kLargeMallocSize = 1 << 29;  // 512M
   // If we malloc and free a large memory chunk, it will not fall
@@ -455,11 +481,13 @@
   new_free_bytes = __asan_get_free_bytes();
   fprintf(stderr, "free bytes after malloc and free: %zu\n", new_free_bytes);
   ASSERT_GE(new_free_bytes, old_free_bytes + kLargeMallocSize);
+#endif  // ASAN_ALLOCATOR_VERSION
   // Test passed.
   DoDoubleFree();
 }
 
 TEST(AddressSanitizerInterface, GetFreeBytesTest) {
+#if ASAN_ALLOCATOR_VERSION == 1
   // Allocate a small chunk. Now allocator probably has a lot of these
   // chunks to fulfill future requests. So, future requests will decrease
   // the number of free bytes. Do this only on systems where there
@@ -481,10 +509,11 @@
     for (i = 0; i < kNumOfChunks; i++)
       free(chunks[i]);
   }
+#endif
   EXPECT_DEATH(DoLargeMallocForGetFreeBytesTestAndDie(), "double-free");
 }
 
-static const size_t kManyThreadsMallocSizes[] = {5, 1UL<<10, 1UL<<20, 357};
+static const size_t kManyThreadsMallocSizes[] = {5, 1UL<<10, 1UL<<14, 357};
 static const size_t kManyThreadsIterations = 250;
 static const size_t kManyThreadsNumThreads =
   (SANITIZER_WORDSIZE == 32) ? 40 : 200;
@@ -496,6 +525,8 @@
       free(Ident(malloc(kManyThreadsMallocSizes[size_index])));
     }
   }
+  // Just one large allocation.
+  free(Ident(malloc(1 << 20)));
   return 0;
 }
 
@@ -602,6 +633,53 @@
   free(vec);
 }
 
+TEST(AddressSanitizerInterface, GlobalRedzones) {
+  GOOD_ACCESS(glob1, 1 - 1);
+  GOOD_ACCESS(glob2, 2 - 1);
+  GOOD_ACCESS(glob3, 3 - 1);
+  GOOD_ACCESS(glob4, 4 - 1);
+  GOOD_ACCESS(glob5, 5 - 1);
+  GOOD_ACCESS(glob6, 6 - 1);
+  GOOD_ACCESS(glob7, 7 - 1);
+  GOOD_ACCESS(glob8, 8 - 1);
+  GOOD_ACCESS(glob9, 9 - 1);
+  GOOD_ACCESS(glob10, 10 - 1);
+  GOOD_ACCESS(glob11, 11 - 1);
+  GOOD_ACCESS(glob12, 12 - 1);
+  GOOD_ACCESS(glob13, 13 - 1);
+  GOOD_ACCESS(glob14, 14 - 1);
+  GOOD_ACCESS(glob15, 15 - 1);
+  GOOD_ACCESS(glob16, 16 - 1);
+  GOOD_ACCESS(glob17, 17 - 1);
+  GOOD_ACCESS(glob1000, 1000 - 1);
+  GOOD_ACCESS(glob10000, 10000 - 1);
+  GOOD_ACCESS(glob100000, 100000 - 1);
+
+  BAD_ACCESS(glob1, 1);
+  BAD_ACCESS(glob2, 2);
+  BAD_ACCESS(glob3, 3);
+  BAD_ACCESS(glob4, 4);
+  BAD_ACCESS(glob5, 5);
+  BAD_ACCESS(glob6, 6);
+  BAD_ACCESS(glob7, 7);
+  BAD_ACCESS(glob8, 8);
+  BAD_ACCESS(glob9, 9);
+  BAD_ACCESS(glob10, 10);
+  BAD_ACCESS(glob11, 11);
+  BAD_ACCESS(glob12, 12);
+  BAD_ACCESS(glob13, 13);
+  BAD_ACCESS(glob14, 14);
+  BAD_ACCESS(glob15, 15);
+  BAD_ACCESS(glob16, 16);
+  BAD_ACCESS(glob17, 17);
+  BAD_ACCESS(glob1000, 1000);
+  BAD_ACCESS(glob1000, 1100);  // Redzone is at least 101 bytes.
+  BAD_ACCESS(glob10000, 10000);
+  BAD_ACCESS(glob10000, 11000);  // Redzone is at least 1001 bytes.
+  BAD_ACCESS(glob100000, 100000);
+  BAD_ACCESS(glob100000, 110000);  // Redzone is at least 10001 bytes.
+}
+
 // Make sure that each aligned block of size "2^granularity" doesn't have
 // "true" value before "false" value.
 static void MakeShadowValid(bool *shadow, int length, int granularity) {
@@ -655,6 +733,54 @@
   }
 }
 
+TEST(AddressSanitizerInterface, PoisonedRegion) {
+  size_t rz = 16;
+  for (size_t size = 1; size <= 64; size++) {
+    char *p = new char[size];
+    uptr x = reinterpret_cast<uptr>(p);
+    for (size_t beg = 0; beg < size + rz; beg++) {
+      for (size_t end = beg; end < size + rz; end++) {
+        uptr first_poisoned = __asan_region_is_poisoned(x + beg, end - beg);
+        if (beg == end) {
+          EXPECT_FALSE(first_poisoned);
+        } else if (beg < size && end <= size) {
+          EXPECT_FALSE(first_poisoned);
+        } else if (beg >= size) {
+          EXPECT_EQ(x + beg, first_poisoned);
+        } else {
+          EXPECT_GT(end, size);
+          EXPECT_EQ(x + size, first_poisoned);
+        }
+      }
+    }
+    delete [] p;
+  }
+}
+
+// This is a performance benchmark for manual runs.
+// asan's memset interceptor calls mem_is_zero for the entire shadow region.
+// the profile should look like this:
+//     89.10%   [.] __memset_sse2
+//     10.50%   [.] __sanitizer::mem_is_zero
+// I.e. mem_is_zero should consume ~ SHADOW_GRANULARITY less CPU cycles
+// than memset itself.
+TEST(AddressSanitizerInterface, DISABLED_StressLargeMemset) {
+  size_t size = 1 << 20;
+  char *x = new char[size];
+  for (int i = 0; i < 100000; i++)
+    Ident(memset)(x, 0, size);
+  delete [] x;
+}
+
+// Same here, but we run memset with small sizes.
+TEST(AddressSanitizerInterface, DISABLED_StressSmallMemset) {
+  size_t size = 32;
+  char *x = new char[size];
+  for (int i = 0; i < 100000000; i++)
+    Ident(memset)(x, 0, size);
+  delete [] x;
+}
+
 static const char *kInvalidPoisonMessage = "invalid-poison-memory-range";
 static const char *kInvalidUnpoisonMessage = "invalid-unpoison-memory-range";
 
@@ -693,8 +819,12 @@
 TEST(AddressSanitizerInterface, GetOwnershipStressTest) {
   std::vector<char *> pointers;
   std::vector<size_t> sizes;
+#if ASAN_ALLOCATOR_VERSION == 1
   const size_t kNumMallocs =
       (SANITIZER_WORDSIZE <= 32 || ASAN_LOW_MEMORY) ? 1 << 10 : 1 << 14;
+#elif ASAN_ALLOCATOR_VERSION == 2  // too slow with asan_allocator2. :(
+  const size_t kNumMallocs = 1 << 9;
+#endif
   for (size_t i = 0; i < kNumMallocs; i++) {
     size_t size = i * 100 + 1;
     pointers.push_back((char*)malloc(size));
@@ -710,3 +840,38 @@
   for (size_t i = 0, n = pointers.size(); i < n; i++)
     free(pointers[i]);
 }
+
+TEST(AddressSanitizerInterface, CallocOverflow) {
+  size_t kArraySize = 4096;
+  volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
+  volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;
+  void *p = calloc(kArraySize, kArraySize2);  // Should return 0.
+  EXPECT_EQ(0L, Ident(p));
+}
+
+TEST(AddressSanitizerInterface, CallocOverflow2) {
+#if SANITIZER_WORDSIZE == 32
+  size_t kArraySize = 112;
+  volatile size_t kArraySize2 = 43878406;
+  void *p = calloc(kArraySize, kArraySize2);  // Should return 0.
+  EXPECT_EQ(0L, Ident(p));
+#endif
+}
+
+TEST(AddressSanitizerInterface, CallocReturnsZeroMem) {
+  size_t sizes[] = {16, 1000, 10000, 100000, 2100000};
+  for (size_t s = 0; s < ARRAY_SIZE(sizes); s++) {
+    size_t size = sizes[s];
+    for (size_t iter = 0; iter < 5; iter++) {
+      char *x = Ident((char*)calloc(1, size));
+      EXPECT_EQ(x[0], 0);
+      EXPECT_EQ(x[size - 1], 0);
+      EXPECT_EQ(x[size / 2], 0);
+      EXPECT_EQ(x[size / 3], 0);
+      EXPECT_EQ(x[size / 4], 0);
+      memset(x, 0x42, size);
+      free(Ident(x));
+      free(Ident(malloc(Ident(1 << 27))));  // Try to drain the quarantine.
+    }
+  }
+}
diff --git a/lib/asan/tests/asan_oob_test.cc b/lib/asan/tests/asan_oob_test.cc
new file mode 100644
index 0000000..dbe272c
--- /dev/null
+++ b/lib/asan/tests/asan_oob_test.cc
@@ -0,0 +1,128 @@
+//===-- asan_oob_test.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#include "asan_test_utils.h"
+
+NOINLINE void asan_write_sized_aligned(uint8_t *p, size_t size) {
+  EXPECT_EQ(0U, ((uintptr_t)p % size));
+  if      (size == 1) asan_write((uint8_t*)p);
+  else if (size == 2) asan_write((uint16_t*)p);
+  else if (size == 4) asan_write((uint32_t*)p);
+  else if (size == 8) asan_write((uint64_t*)p);
+}
+
+template<typename T>
+NOINLINE void oob_test(int size, int off) {
+  char *p = (char*)malloc_aaa(size);
+  // fprintf(stderr, "writing %d byte(s) into [%p,%p) with offset %d\n",
+  //        sizeof(T), p, p + size, off);
+  asan_write((T*)(p + off));
+  free_aaa(p);
+}
+
+template<typename T>
+void OOBTest() {
+  char expected_str[100];
+  for (int size = sizeof(T); size < 20; size += 5) {
+    for (int i = -5; i < 0; i++) {
+      const char *str =
+          "is located.*%d byte.*to the left";
+      sprintf(expected_str, str, abs(i));
+      EXPECT_DEATH(oob_test<T>(size, i), expected_str);
+    }
+
+    for (int i = 0; i < (int)(size - sizeof(T) + 1); i++)
+      oob_test<T>(size, i);
+
+    for (int i = size - sizeof(T) + 1; i <= (int)(size + 2 * sizeof(T)); i++) {
+      const char *str =
+          "is located.*%d byte.*to the right";
+      int off = i >= size ? (i - size) : 0;
+      // we don't catch unaligned partially OOB accesses.
+      if (i % sizeof(T)) continue;
+      sprintf(expected_str, str, off);
+      EXPECT_DEATH(oob_test<T>(size, i), expected_str);
+    }
+  }
+
+  EXPECT_DEATH(oob_test<T>(kLargeMalloc, -1),
+          "is located.*1 byte.*to the left");
+  EXPECT_DEATH(oob_test<T>(kLargeMalloc, kLargeMalloc),
+          "is located.*0 byte.*to the right");
+}
+
+// TODO(glider): the following tests are EXTREMELY slow on Darwin:
+//   AddressSanitizer.OOB_char (125503 ms)
+//   AddressSanitizer.OOB_int (126890 ms)
+//   AddressSanitizer.OOBRightTest (315605 ms)
+//   AddressSanitizer.SimpleStackTest (366559 ms)
+
+TEST(AddressSanitizer, OOB_char) {
+  OOBTest<U1>();
+}
+
+TEST(AddressSanitizer, OOB_int) {
+  OOBTest<U4>();
+}
+
+TEST(AddressSanitizer, OOBRightTest) {
+  for (size_t access_size = 1; access_size <= 8; access_size *= 2) {
+    for (size_t alloc_size = 1; alloc_size <= 8; alloc_size++) {
+      for (size_t offset = 0; offset <= 8; offset += access_size) {
+        void *p = malloc(alloc_size);
+        // allocated: [p, p + alloc_size)
+        // accessed:  [p + offset, p + offset + access_size)
+        uint8_t *addr = (uint8_t*)p + offset;
+        if (offset + access_size <= alloc_size) {
+          asan_write_sized_aligned(addr, access_size);
+        } else {
+          int outside_bytes = offset > alloc_size ? (offset - alloc_size) : 0;
+          const char *str =
+              "is located.%d *byte.*to the right";
+          char expected_str[100];
+          sprintf(expected_str, str, outside_bytes);
+          EXPECT_DEATH(asan_write_sized_aligned(addr, access_size),
+                       expected_str);
+        }
+        free(p);
+      }
+    }
+  }
+}
+
+#if ASAN_ALLOCATOR_VERSION == 2  // Broken with the asan_allocator1
+TEST(AddressSanitizer, LargeOOBRightTest) {
+  size_t large_power_of_two = 1 << 19;
+  for (size_t i = 16; i <= 256; i *= 2) {
+    size_t size = large_power_of_two - i;
+    char *p = Ident(new char[size]);
+    EXPECT_DEATH(p[size] = 0, "is located 0 bytes to the right");
+    delete [] p;
+  }
+}
+#endif  // ASAN_ALLOCATOR_VERSION == 2
+
+TEST(AddressSanitizer, DISABLED_DemoOOBLeftLow) {
+  oob_test<U1>(10, -1);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoOOBLeftHigh) {
+  oob_test<U1>(kLargeMalloc, -1);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoOOBRightLow) {
+  oob_test<U1>(10, 10);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoOOBRightHigh) {
+  oob_test<U1>(kLargeMalloc, kLargeMalloc);
+}
diff --git a/lib/asan/tests/asan_str_test.cc b/lib/asan/tests/asan_str_test.cc
new file mode 100644
index 0000000..128fb61
--- /dev/null
+++ b/lib/asan/tests/asan_str_test.cc
@@ -0,0 +1,572 @@
+//=-- asan_str_test.cc ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#include "asan_test_utils.h"
+
+// Used for string functions tests
+static char global_string[] = "global";
+static size_t global_string_length = 6;
+
+// Input to a test is a zero-terminated string str with given length
+// Accesses to the bytes to the left and to the right of str
+// are presumed to produce OOB errors
+void StrLenOOBTestTemplate(char *str, size_t length, bool is_global) {
+  // Normal strlen calls
+  EXPECT_EQ(strlen(str), length);
+  if (length > 0) {
+    EXPECT_EQ(length - 1, strlen(str + 1));
+    EXPECT_EQ(0U, strlen(str + length));
+  }
+  // Arg of strlen is not malloced, OOB access
+  if (!is_global) {
+    // We don't insert RedZones to the left of global variables
+    EXPECT_DEATH(Ident(strlen(str - 1)), LeftOOBReadMessage(1));
+    EXPECT_DEATH(Ident(strlen(str - 5)), LeftOOBReadMessage(5));
+  }
+  EXPECT_DEATH(Ident(strlen(str + length + 1)), RightOOBReadMessage(0));
+  // Overwrite terminator
+  str[length] = 'a';
+  // String is not zero-terminated, strlen will lead to OOB access
+  EXPECT_DEATH(Ident(strlen(str)), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(strlen(str + length)), RightOOBReadMessage(0));
+  // Restore terminator
+  str[length] = 0;
+}
+TEST(AddressSanitizer, StrLenOOBTest) {
+  // Check heap-allocated string
+  size_t length = Ident(10);
+  char *heap_string = Ident((char*)malloc(length + 1));
+  char stack_string[10 + 1];
+  break_optimization(&stack_string);
+  for (size_t i = 0; i < length; i++) {
+    heap_string[i] = 'a';
+    stack_string[i] = 'b';
+  }
+  heap_string[length] = 0;
+  stack_string[length] = 0;
+  StrLenOOBTestTemplate(heap_string, length, false);
+  // TODO(samsonov): Fix expected messages in StrLenOOBTestTemplate to
+  //      make test for stack_string work. Or move it to output tests.
+  // StrLenOOBTestTemplate(stack_string, length, false);
+  StrLenOOBTestTemplate(global_string, global_string_length, true);
+  free(heap_string);
+}
+
+#ifndef __APPLE__
+TEST(AddressSanitizer, StrNLenOOBTest) {
+  size_t size = Ident(123);
+  char *str = MallocAndMemsetString(size);
+  // Normal strnlen calls.
+  Ident(strnlen(str - 1, 0));
+  Ident(strnlen(str, size));
+  Ident(strnlen(str + size - 1, 1));
+  str[size - 1] = '\0';
+  Ident(strnlen(str, 2 * size));
+  // Argument points to not allocated memory.
+  EXPECT_DEATH(Ident(strnlen(str - 1, 1)), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(strnlen(str + size, 1)), RightOOBReadMessage(0));
+  // Overwrite the terminating '\0' and hit unallocated memory.
+  str[size - 1] = 'z';
+  EXPECT_DEATH(Ident(strnlen(str, size + 1)), RightOOBReadMessage(0));
+  free(str);
+}
+#endif
+
+TEST(AddressSanitizer, StrDupOOBTest) {
+  size_t size = Ident(42);
+  char *str = MallocAndMemsetString(size);
+  char *new_str;
+  // Normal strdup calls.
+  str[size - 1] = '\0';
+  new_str = strdup(str);
+  free(new_str);
+  new_str = strdup(str + size - 1);
+  free(new_str);
+  // Argument points to not allocated memory.
+  EXPECT_DEATH(Ident(strdup(str - 1)), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(strdup(str + size)), RightOOBReadMessage(0));
+  // Overwrite the terminating '\0' and hit unallocated memory.
+  str[size - 1] = 'z';
+  EXPECT_DEATH(Ident(strdup(str)), RightOOBReadMessage(0));
+  free(str);
+}
+
+TEST(AddressSanitizer, StrCpyOOBTest) {
+  size_t to_size = Ident(30);
+  size_t from_size = Ident(6);  // less than to_size
+  char *to = Ident((char*)malloc(to_size));
+  char *from = Ident((char*)malloc(from_size));
+  // Normal strcpy calls.
+  strcpy(from, "hello");
+  strcpy(to, from);
+  strcpy(to + to_size - from_size, from);
+  // Length of "from" is too small.
+  EXPECT_DEATH(Ident(strcpy(from, "hello2")), RightOOBWriteMessage(0));
+  // "to" or "from" points to not allocated memory.
+  EXPECT_DEATH(Ident(strcpy(to - 1, from)), LeftOOBWriteMessage(1));
+  EXPECT_DEATH(Ident(strcpy(to, from - 1)), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(strcpy(to, from + from_size)), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(strcpy(to + to_size, from)), RightOOBWriteMessage(0));
+  // Overwrite the terminating '\0' character and hit unallocated memory.
+  from[from_size - 1] = '!';
+  EXPECT_DEATH(Ident(strcpy(to, from)), RightOOBReadMessage(0));
+  free(to);
+  free(from);
+}
+
+TEST(AddressSanitizer, StrNCpyOOBTest) {
+  size_t to_size = Ident(20);
+  size_t from_size = Ident(6);  // less than to_size
+  char *to = Ident((char*)malloc(to_size));
+  // From is a zero-terminated string "hello\0" of length 6
+  char *from = Ident((char*)malloc(from_size));
+  strcpy(from, "hello");
+  // copy 0 bytes
+  strncpy(to, from, 0);
+  strncpy(to - 1, from - 1, 0);
+  // normal strncpy calls
+  strncpy(to, from, from_size);
+  strncpy(to, from, to_size);
+  strncpy(to, from + from_size - 1, to_size);
+  strncpy(to + to_size - 1, from, 1);
+  // One of {to, from} points to not allocated memory
+  EXPECT_DEATH(Ident(strncpy(to, from - 1, from_size)),
+               LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(strncpy(to - 1, from, from_size)),
+               LeftOOBWriteMessage(1));
+  EXPECT_DEATH(Ident(strncpy(to, from + from_size, 1)),
+               RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(strncpy(to + to_size, from, 1)),
+               RightOOBWriteMessage(0));
+  // Length of "to" is too small
+  EXPECT_DEATH(Ident(strncpy(to + to_size - from_size + 1, from, from_size)),
+               RightOOBWriteMessage(0));
+  EXPECT_DEATH(Ident(strncpy(to + 1, from, to_size)),
+               RightOOBWriteMessage(0));
+  // Overwrite terminator in from
+  from[from_size - 1] = '!';
+  // normal strncpy call
+  strncpy(to, from, from_size);
+  // Length of "from" is too small
+  EXPECT_DEATH(Ident(strncpy(to, from, to_size)),
+               RightOOBReadMessage(0));
+  free(to);
+  free(from);
+}
+
+// Users may have different definitions of "strchr" and "index", so provide
+// function pointer typedefs and overload RunStrChrTest implementation.
+// We can't use macro for RunStrChrTest body here, as this macro would
+// confuse EXPECT_DEATH gtest macro.
+typedef char*(*PointerToStrChr1)(const char*, int);
+typedef char*(*PointerToStrChr2)(char*, int);
+
+USED static void RunStrChrTest(PointerToStrChr1 StrChr) {
+  size_t size = Ident(100);
+  char *str = MallocAndMemsetString(size);
+  str[10] = 'q';
+  str[11] = '\0';
+  EXPECT_EQ(str, StrChr(str, 'z'));
+  EXPECT_EQ(str + 10, StrChr(str, 'q'));
+  EXPECT_EQ(NULL, StrChr(str, 'a'));
+  // StrChr argument points to not allocated memory.
+  EXPECT_DEATH(Ident(StrChr(str - 1, 'z')), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(StrChr(str + size, 'z')), RightOOBReadMessage(0));
+  // Overwrite the terminator and hit not allocated memory.
+  str[11] = 'z';
+  EXPECT_DEATH(Ident(StrChr(str, 'a')), RightOOBReadMessage(0));
+  free(str);
+}
+USED static void RunStrChrTest(PointerToStrChr2 StrChr) {
+  size_t size = Ident(100);
+  char *str = MallocAndMemsetString(size);
+  str[10] = 'q';
+  str[11] = '\0';
+  EXPECT_EQ(str, StrChr(str, 'z'));
+  EXPECT_EQ(str + 10, StrChr(str, 'q'));
+  EXPECT_EQ(NULL, StrChr(str, 'a'));
+  // StrChr argument points to not allocated memory.
+  EXPECT_DEATH(Ident(StrChr(str - 1, 'z')), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(StrChr(str + size, 'z')), RightOOBReadMessage(0));
+  // Overwrite the terminator and hit not allocated memory.
+  str[11] = 'z';
+  EXPECT_DEATH(Ident(StrChr(str, 'a')), RightOOBReadMessage(0));
+  free(str);
+}
+
+TEST(AddressSanitizer, StrChrAndIndexOOBTest) {
+  RunStrChrTest(&strchr);
+  RunStrChrTest(&index);
+}
+
+TEST(AddressSanitizer, StrCmpAndFriendsLogicTest) {
+  // strcmp
+  EXPECT_EQ(0, strcmp("", ""));
+  EXPECT_EQ(0, strcmp("abcd", "abcd"));
+  EXPECT_GT(0, strcmp("ab", "ac"));
+  EXPECT_GT(0, strcmp("abc", "abcd"));
+  EXPECT_LT(0, strcmp("acc", "abc"));
+  EXPECT_LT(0, strcmp("abcd", "abc"));
+
+  // strncmp
+  EXPECT_EQ(0, strncmp("a", "b", 0));
+  EXPECT_EQ(0, strncmp("abcd", "abcd", 10));
+  EXPECT_EQ(0, strncmp("abcd", "abcef", 3));
+  EXPECT_GT(0, strncmp("abcde", "abcfa", 4));
+  EXPECT_GT(0, strncmp("a", "b", 5));
+  EXPECT_GT(0, strncmp("bc", "bcde", 4));
+  EXPECT_LT(0, strncmp("xyz", "xyy", 10));
+  EXPECT_LT(0, strncmp("baa", "aaa", 1));
+  EXPECT_LT(0, strncmp("zyx", "", 2));
+
+  // strcasecmp
+  EXPECT_EQ(0, strcasecmp("", ""));
+  EXPECT_EQ(0, strcasecmp("zzz", "zzz"));
+  EXPECT_EQ(0, strcasecmp("abCD", "ABcd"));
+  EXPECT_GT(0, strcasecmp("aB", "Ac"));
+  EXPECT_GT(0, strcasecmp("ABC", "ABCd"));
+  EXPECT_LT(0, strcasecmp("acc", "abc"));
+  EXPECT_LT(0, strcasecmp("ABCd", "abc"));
+
+  // strncasecmp
+  EXPECT_EQ(0, strncasecmp("a", "b", 0));
+  EXPECT_EQ(0, strncasecmp("abCD", "ABcd", 10));
+  EXPECT_EQ(0, strncasecmp("abCd", "ABcef", 3));
+  EXPECT_GT(0, strncasecmp("abcde", "ABCfa", 4));
+  EXPECT_GT(0, strncasecmp("a", "B", 5));
+  EXPECT_GT(0, strncasecmp("bc", "BCde", 4));
+  EXPECT_LT(0, strncasecmp("xyz", "xyy", 10));
+  EXPECT_LT(0, strncasecmp("Baa", "aaa", 1));
+  EXPECT_LT(0, strncasecmp("zyx", "", 2));
+
+  // memcmp
+  EXPECT_EQ(0, memcmp("a", "b", 0));
+  EXPECT_EQ(0, memcmp("ab\0c", "ab\0c", 4));
+  EXPECT_GT(0, memcmp("\0ab", "\0ac", 3));
+  EXPECT_GT(0, memcmp("abb\0", "abba", 4));
+  EXPECT_LT(0, memcmp("ab\0cd", "ab\0c\0", 5));
+  EXPECT_LT(0, memcmp("zza", "zyx", 3));
+}
+
+typedef int(*PointerToStrCmp)(const char*, const char*);
+void RunStrCmpTest(PointerToStrCmp StrCmp) {
+  size_t size = Ident(100);
+  int fill = 'o';
+  char *s1 = MallocAndMemsetString(size, fill);
+  char *s2 = MallocAndMemsetString(size, fill);
+  s1[size - 1] = '\0';
+  s2[size - 1] = '\0';
+  // Normal StrCmp calls
+  Ident(StrCmp(s1, s2));
+  Ident(StrCmp(s1, s2 + size - 1));
+  Ident(StrCmp(s1 + size - 1, s2 + size - 1));
+  s1[size - 1] = 'z';
+  s2[size - 1] = 'x';
+  Ident(StrCmp(s1, s2));
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(Ident(StrCmp)(s1 - 1, s2), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(StrCmp)(s1, s2 - 1), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(StrCmp)(s1 + size, s2), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(StrCmp)(s1, s2 + size), RightOOBReadMessage(0));
+  // Hit unallocated memory and die.
+  s1[size - 1] = fill;
+  EXPECT_DEATH(Ident(StrCmp)(s1, s1), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(StrCmp)(s1 + size - 1, s2), RightOOBReadMessage(0));
+  free(s1);
+  free(s2);
+}
+
+TEST(AddressSanitizer, StrCmpOOBTest) {
+  RunStrCmpTest(&strcmp);
+}
+
+TEST(AddressSanitizer, StrCaseCmpOOBTest) {
+  RunStrCmpTest(&strcasecmp);
+}
+
+typedef int(*PointerToStrNCmp)(const char*, const char*, size_t);
+void RunStrNCmpTest(PointerToStrNCmp StrNCmp) {
+  size_t size = Ident(100);
+  char *s1 = MallocAndMemsetString(size);
+  char *s2 = MallocAndMemsetString(size);
+  s1[size - 1] = '\0';
+  s2[size - 1] = '\0';
+  // Normal StrNCmp calls
+  Ident(StrNCmp(s1, s2, size + 2));
+  s1[size - 1] = 'z';
+  s2[size - 1] = 'x';
+  Ident(StrNCmp(s1 + size - 2, s2 + size - 2, size));
+  s2[size - 1] = 'z';
+  Ident(StrNCmp(s1 - 1, s2 - 1, 0));
+  Ident(StrNCmp(s1 + size - 1, s2 + size - 1, 1));
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(Ident(StrNCmp)(s1 - 1, s2, 1), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(StrNCmp)(s1, s2 - 1, 1), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(StrNCmp)(s1 + size, s2, 1), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(StrNCmp)(s1, s2 + size, 1), RightOOBReadMessage(0));
+  // Hit unallocated memory and die.
+  EXPECT_DEATH(Ident(StrNCmp)(s1 + 1, s2 + 1, size), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(StrNCmp)(s1 + size - 1, s2, 2), RightOOBReadMessage(0));
+  free(s1);
+  free(s2);
+}
+
+TEST(AddressSanitizer, StrNCmpOOBTest) {
+  RunStrNCmpTest(&strncmp);
+}
+
+TEST(AddressSanitizer, StrNCaseCmpOOBTest) {
+  RunStrNCmpTest(&strncasecmp);
+}
+TEST(AddressSanitizer, StrCatOOBTest) {
+  // strcat() reads strlen(to) bytes from |to| before concatenating.
+  size_t to_size = Ident(100);
+  char *to = MallocAndMemsetString(to_size);
+  to[0] = '\0';
+  size_t from_size = Ident(20);
+  char *from = MallocAndMemsetString(from_size);
+  from[from_size - 1] = '\0';
+  // Normal strcat calls.
+  strcat(to, from);
+  strcat(to, from);
+  strcat(to + from_size, from + from_size - 2);
+  // Passing an invalid pointer is an error even when concatenating an empty
+  // string.
+  EXPECT_DEATH(strcat(to - 1, from + from_size - 1), LeftOOBAccessMessage(1));
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(strcat(to - 1, from), LeftOOBAccessMessage(1));
+  EXPECT_DEATH(strcat(to, from - 1), LeftOOBReadMessage(1));
+  EXPECT_DEATH(strcat(to + to_size, from), RightOOBWriteMessage(0));
+  EXPECT_DEATH(strcat(to, from + from_size), RightOOBReadMessage(0));
+
+  // "from" is not zero-terminated.
+  from[from_size - 1] = 'z';
+  EXPECT_DEATH(strcat(to, from), RightOOBReadMessage(0));
+  from[from_size - 1] = '\0';
+  // "to" is not zero-terminated.
+  memset(to, 'z', to_size);
+  EXPECT_DEATH(strcat(to, from), RightOOBWriteMessage(0));
+  // "to" is too short to fit "from".
+  to[to_size - from_size + 1] = '\0';
+  EXPECT_DEATH(strcat(to, from), RightOOBWriteMessage(0));
+  // length of "to" is just enough.
+  strcat(to, from + 1);
+
+  free(to);
+  free(from);
+}
+
+TEST(AddressSanitizer, StrNCatOOBTest) {
+  // strncat() reads strlen(to) bytes from |to| before concatenating.
+  size_t to_size = Ident(100);
+  char *to = MallocAndMemsetString(to_size);
+  to[0] = '\0';
+  size_t from_size = Ident(20);
+  char *from = MallocAndMemsetString(from_size);
+  // Normal strncat calls.
+  strncat(to, from, 0);
+  strncat(to, from, from_size);
+  from[from_size - 1] = '\0';
+  strncat(to, from, 2 * from_size);
+  // Catenating empty string with an invalid string is still an error.
+  EXPECT_DEATH(strncat(to - 1, from, 0), LeftOOBAccessMessage(1));
+  strncat(to, from + from_size - 1, 10);
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(strncat(to - 1, from, 2), LeftOOBAccessMessage(1));
+  EXPECT_DEATH(strncat(to, from - 1, 2), LeftOOBReadMessage(1));
+  EXPECT_DEATH(strncat(to + to_size, from, 2), RightOOBWriteMessage(0));
+  EXPECT_DEATH(strncat(to, from + from_size, 2), RightOOBReadMessage(0));
+
+  memset(from, 'z', from_size);
+  memset(to, 'z', to_size);
+  to[0] = '\0';
+  // "from" is too short.
+  EXPECT_DEATH(strncat(to, from, from_size + 1), RightOOBReadMessage(0));
+  // "to" is not zero-terminated.
+  EXPECT_DEATH(strncat(to + 1, from, 1), RightOOBWriteMessage(0));
+  // "to" is too short to fit "from".
+  to[0] = 'z';
+  to[to_size - from_size + 1] = '\0';
+  EXPECT_DEATH(strncat(to, from, from_size - 1), RightOOBWriteMessage(0));
+  // "to" is just enough.
+  strncat(to, from, from_size - 2);
+
+  free(to);
+  free(from);
+}
+
+static string OverlapErrorMessage(const string &func) {
+  return func + "-param-overlap";
+}
+
+TEST(AddressSanitizer, StrArgsOverlapTest) {
+  size_t size = Ident(100);
+  char *str = Ident((char*)malloc(size));
+
+// Do not check memcpy() on OS X 10.7 and later, where it actually aliases
+// memmove().
+#if !defined(__APPLE__) || !defined(MAC_OS_X_VERSION_10_7) || \
+    (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7)
+  // Check "memcpy". Use Ident() to avoid inlining.
+  memset(str, 'z', size);
+  Ident(memcpy)(str + 1, str + 11, 10);
+  Ident(memcpy)(str, str, 0);
+  EXPECT_DEATH(Ident(memcpy)(str, str + 14, 15), OverlapErrorMessage("memcpy"));
+  EXPECT_DEATH(Ident(memcpy)(str + 14, str, 15), OverlapErrorMessage("memcpy"));
+#endif
+
+  // We do not treat memcpy with to==from as a bug.
+  // See http://llvm.org/bugs/show_bug.cgi?id=11763.
+  // EXPECT_DEATH(Ident(memcpy)(str + 20, str + 20, 1),
+  //              OverlapErrorMessage("memcpy"));
+
+  // Check "strcpy".
+  memset(str, 'z', size);
+  str[9] = '\0';
+  strcpy(str + 10, str);
+  EXPECT_DEATH(strcpy(str + 9, str), OverlapErrorMessage("strcpy"));
+  EXPECT_DEATH(strcpy(str, str + 4), OverlapErrorMessage("strcpy"));
+  strcpy(str, str + 5);
+
+  // Check "strncpy".
+  memset(str, 'z', size);
+  strncpy(str, str + 10, 10);
+  EXPECT_DEATH(strncpy(str, str + 9, 10), OverlapErrorMessage("strncpy"));
+  EXPECT_DEATH(strncpy(str + 9, str, 10), OverlapErrorMessage("strncpy"));
+  str[10] = '\0';
+  strncpy(str + 11, str, 20);
+  EXPECT_DEATH(strncpy(str + 10, str, 20), OverlapErrorMessage("strncpy"));
+
+  // Check "strcat".
+  memset(str, 'z', size);
+  str[10] = '\0';
+  str[20] = '\0';
+  strcat(str, str + 10);
+  EXPECT_DEATH(strcat(str, str + 11), OverlapErrorMessage("strcat"));
+  str[10] = '\0';
+  strcat(str + 11, str);
+  EXPECT_DEATH(strcat(str, str + 9), OverlapErrorMessage("strcat"));
+  EXPECT_DEATH(strcat(str + 9, str), OverlapErrorMessage("strcat"));
+  EXPECT_DEATH(strcat(str + 10, str), OverlapErrorMessage("strcat"));
+
+  // Check "strncat".
+  memset(str, 'z', size);
+  str[10] = '\0';
+  strncat(str, str + 10, 10);  // from is empty
+  EXPECT_DEATH(strncat(str, str + 11, 10), OverlapErrorMessage("strncat"));
+  str[10] = '\0';
+  str[20] = '\0';
+  strncat(str + 5, str, 5);
+  str[10] = '\0';
+  EXPECT_DEATH(strncat(str + 5, str, 6), OverlapErrorMessage("strncat"));
+  EXPECT_DEATH(strncat(str, str + 9, 10), OverlapErrorMessage("strncat"));
+
+  free(str);
+}
+
+void CallAtoi(const char *nptr) {
+  Ident(atoi(nptr));
+}
+void CallAtol(const char *nptr) {
+  Ident(atol(nptr));
+}
+void CallAtoll(const char *nptr) {
+  Ident(atoll(nptr));
+}
+typedef void(*PointerToCallAtoi)(const char*);
+
+void RunAtoiOOBTest(PointerToCallAtoi Atoi) {
+  char *array = MallocAndMemsetString(10, '1');
+  // Invalid pointer to the string.
+  EXPECT_DEATH(Atoi(array + 11), RightOOBReadMessage(1));
+  EXPECT_DEATH(Atoi(array - 1), LeftOOBReadMessage(1));
+  // Die if a buffer doesn't have terminating NULL.
+  EXPECT_DEATH(Atoi(array), RightOOBReadMessage(0));
+  // Make last symbol a terminating NULL or other non-digit.
+  array[9] = '\0';
+  Atoi(array);
+  array[9] = 'a';
+  Atoi(array);
+  Atoi(array + 9);
+  // Sometimes we need to detect overflow if no digits are found.
+  memset(array, ' ', 10);
+  EXPECT_DEATH(Atoi(array), RightOOBReadMessage(0));
+  array[9] = '-';
+  EXPECT_DEATH(Atoi(array), RightOOBReadMessage(0));
+  EXPECT_DEATH(Atoi(array + 9), RightOOBReadMessage(0));
+  array[8] = '-';
+  Atoi(array);
+  free(array);
+}
+
+TEST(AddressSanitizer, AtoiAndFriendsOOBTest) {
+  RunAtoiOOBTest(&CallAtoi);
+  RunAtoiOOBTest(&CallAtol);
+  RunAtoiOOBTest(&CallAtoll);
+}
+
+void CallStrtol(const char *nptr, char **endptr, int base) {
+  Ident(strtol(nptr, endptr, base));
+}
+void CallStrtoll(const char *nptr, char **endptr, int base) {
+  Ident(strtoll(nptr, endptr, base));
+}
+typedef void(*PointerToCallStrtol)(const char*, char**, int);
+
+void RunStrtolOOBTest(PointerToCallStrtol Strtol) {
+  char *array = MallocAndMemsetString(3);
+  char *endptr = NULL;
+  array[0] = '1';
+  array[1] = '2';
+  array[2] = '3';
+  // Invalid pointer to the string.
+  EXPECT_DEATH(Strtol(array + 3, NULL, 0), RightOOBReadMessage(0));
+  EXPECT_DEATH(Strtol(array - 1, NULL, 0), LeftOOBReadMessage(1));
+  // Buffer overflow if there is no terminating null (depends on base).
+  Strtol(array, &endptr, 3);
+  EXPECT_EQ(array + 2, endptr);
+  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBReadMessage(0));
+  array[2] = 'z';
+  Strtol(array, &endptr, 35);
+  EXPECT_EQ(array + 2, endptr);
+  EXPECT_DEATH(Strtol(array, NULL, 36), RightOOBReadMessage(0));
+  // Add terminating zero to get rid of overflow.
+  array[2] = '\0';
+  Strtol(array, NULL, 36);
+  // Don't check for overflow if base is invalid.
+  Strtol(array - 1, NULL, -1);
+  Strtol(array + 3, NULL, 1);
+  // Sometimes we need to detect overflow if no digits are found.
+  array[0] = array[1] = array[2] = ' ';
+  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBReadMessage(0));
+  array[2] = '+';
+  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBReadMessage(0));
+  array[2] = '-';
+  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBReadMessage(0));
+  array[1] = '+';
+  Strtol(array, NULL, 0);
+  array[1] = array[2] = 'z';
+  Strtol(array, &endptr, 0);
+  EXPECT_EQ(array, endptr);
+  Strtol(array + 2, NULL, 0);
+  EXPECT_EQ(array, endptr);
+  free(array);
+}
+
+TEST(AddressSanitizer, StrtollOOBTest) {
+  RunStrtolOOBTest(&CallStrtoll);
+}
+TEST(AddressSanitizer, StrtolOOBTest) {
+  RunStrtolOOBTest(&CallStrtol);
+}
+
+
diff --git a/lib/asan/tests/asan_test.cc b/lib/asan/tests/asan_test.cc
index 7bb6e29..1096c2e 100644
--- a/lib/asan/tests/asan_test.cc
+++ b/lib/asan/tests/asan_test.cc
@@ -10,72 +10,8 @@
 // This file is a part of AddressSanitizer, an address sanity checker.
 //
 //===----------------------------------------------------------------------===//
-#include <stdio.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-#include <pthread.h>
-#include <stdint.h>
-#include <setjmp.h>
-#include <assert.h>
-
-#ifdef __linux__
-# include <sys/prctl.h>
-#endif
-
-#if defined(__i386__) || defined(__x86_64__)
-#include <emmintrin.h>
-#endif
-
 #include "asan_test_utils.h"
 
-#ifndef __APPLE__
-#include <malloc.h>
-#else
-#include <malloc/malloc.h>
-#include <AvailabilityMacros.h>  // For MAC_OS_X_VERSION_*
-#include <CoreFoundation/CFString.h>
-#endif  // __APPLE__
-
-#if ASAN_HAS_EXCEPTIONS
-# define ASAN_THROW(x) throw (x)
-#else
-# define ASAN_THROW(x)
-#endif
-
-#include <sys/mman.h>
-
-typedef uint8_t   U1;
-typedef uint16_t  U2;
-typedef uint32_t  U4;
-typedef uint64_t  U8;
-
-static const int kPageSize = 4096;
-
-// Simple stand-alone pseudorandom number generator.
-// Current algorithm is ANSI C linear congruential PRNG.
-static inline uint32_t my_rand(uint32_t* state) {
-  return (*state = *state * 1103515245 + 12345) >> 16;
-}
-
-static uint32_t global_seed = 0;
-
-const size_t kLargeMalloc = 1 << 24;
-
-template<typename T>
-NOINLINE void asan_write(T *a) {
-  *a = 0;
-}
-
-NOINLINE void asan_write_sized_aligned(uint8_t *p, size_t size) {
-  EXPECT_EQ(0U, ((uintptr_t)p % size));
-  if      (size == 1) asan_write((uint8_t*)p);
-  else if (size == 2) asan_write((uint16_t*)p);
-  else if (size == 4) asan_write((uint32_t*)p);
-  else if (size == 8) asan_write((uint64_t*)p);
-}
-
 NOINLINE void *malloc_fff(size_t size) {
   void *res = malloc/**/(size); break_optimization(0); return res;}
 NOINLINE void *malloc_eee(size_t size) {
@@ -109,15 +45,6 @@
 NOINLINE void free_bbb(void *p) { free_ccc(p); break_optimization(0);}
 NOINLINE void free_aaa(void *p) { free_bbb(p); break_optimization(0);}
 
-template<typename T>
-NOINLINE void oob_test(int size, int off) {
-  char *p = (char*)malloc_aaa(size);
-  // fprintf(stderr, "writing %d byte(s) into [%p,%p) with offset %d\n",
-  //        sizeof(T), p, p + size, off);
-  asan_write((T*)(p + off));
-  free_aaa(p);
-}
-
 
 template<typename T>
 NOINLINE void uaf_test(int size, int off) {
@@ -234,76 +161,6 @@
   pthread_key_delete(test_key);
 }
 
-template<typename T>
-void OOBTest() {
-  char expected_str[100];
-  for (int size = sizeof(T); size < 20; size += 5) {
-    for (int i = -5; i < 0; i++) {
-      const char *str =
-          "is located.*%d byte.*to the left";
-      sprintf(expected_str, str, abs(i));
-      EXPECT_DEATH(oob_test<T>(size, i), expected_str);
-    }
-
-    for (int i = 0; i < (int)(size - sizeof(T) + 1); i++)
-      oob_test<T>(size, i);
-
-    for (int i = size - sizeof(T) + 1; i <= (int)(size + 3 * sizeof(T)); i++) {
-      const char *str =
-          "is located.*%d byte.*to the right";
-      int off = i >= size ? (i - size) : 0;
-      // we don't catch unaligned partially OOB accesses.
-      if (i % sizeof(T)) continue;
-      sprintf(expected_str, str, off);
-      EXPECT_DEATH(oob_test<T>(size, i), expected_str);
-    }
-  }
-
-  EXPECT_DEATH(oob_test<T>(kLargeMalloc, -1),
-          "is located.*1 byte.*to the left");
-  EXPECT_DEATH(oob_test<T>(kLargeMalloc, kLargeMalloc),
-          "is located.*0 byte.*to the right");
-}
-
-// TODO(glider): the following tests are EXTREMELY slow on Darwin:
-//   AddressSanitizer.OOB_char (125503 ms)
-//   AddressSanitizer.OOB_int (126890 ms)
-//   AddressSanitizer.OOBRightTest (315605 ms)
-//   AddressSanitizer.SimpleStackTest (366559 ms)
-
-TEST(AddressSanitizer, OOB_char) {
-  OOBTest<U1>();
-}
-
-TEST(AddressSanitizer, OOB_int) {
-  OOBTest<U4>();
-}
-
-TEST(AddressSanitizer, OOBRightTest) {
-  for (size_t access_size = 1; access_size <= 8; access_size *= 2) {
-    for (size_t alloc_size = 1; alloc_size <= 8; alloc_size++) {
-      for (size_t offset = 0; offset <= 8; offset += access_size) {
-        void *p = malloc(alloc_size);
-        // allocated: [p, p + alloc_size)
-        // accessed:  [p + offset, p + offset + access_size)
-        uint8_t *addr = (uint8_t*)p + offset;
-        if (offset + access_size <= alloc_size) {
-          asan_write_sized_aligned(addr, access_size);
-        } else {
-          int outside_bytes = offset > alloc_size ? (offset - alloc_size) : 0;
-          const char *str =
-              "is located.%d *byte.*to the right";
-          char expected_str[100];
-          sprintf(expected_str, str, outside_bytes);
-          EXPECT_DEATH(asan_write_sized_aligned(addr, access_size),
-                       expected_str);
-        }
-        free(p);
-      }
-    }
-  }
-}
-
 TEST(AddressSanitizer, UAF_char) {
   const char *uaf_string = "AddressSanitizer:.*heap-use-after-free";
   EXPECT_DEATH(uaf_test<U1>(1, 0), uaf_string);
@@ -313,6 +170,27 @@
   EXPECT_DEATH(uaf_test<U1>(kLargeMalloc, kLargeMalloc / 2), uaf_string);
 }
 
+TEST(AddressSanitizer, UAF_long_double) {
+  if (sizeof(long double) == sizeof(double)) return;
+  long double *p = Ident(new long double[10]);
+  EXPECT_DEATH(Ident(p)[12] = 0, "WRITE of size 10");
+  EXPECT_DEATH(Ident(p)[0] = Ident(p)[12], "READ of size 10");
+  delete [] Ident(p);
+}
+
+struct Packed5 {
+  int x;
+  char c;
+} __attribute__((packed));
+
+
+TEST(AddressSanitizer, UAF_Packed5) {
+  Packed5 *p = Ident(new Packed5[2]);
+  EXPECT_DEATH(p[0] = p[3], "READ of size 5");
+  EXPECT_DEATH(p[3] = p[0], "WRITE of size 5");
+  delete [] Ident(p);
+}
+
 #if ASAN_HAS_BLACKLIST
 TEST(AddressSanitizer, IgnoreTest) {
   int *x = Ident(new int);
@@ -400,21 +278,21 @@
 #endif
 
 static void MallocStress(size_t n) {
-  uint32_t seed = my_rand(&global_seed);
+  uint32_t seed = my_rand();
   for (size_t iter = 0; iter < 10; iter++) {
     vector<void *> vec;
     for (size_t i = 0; i < n; i++) {
       if ((i % 3) == 0) {
         if (vec.empty()) continue;
-        size_t idx = my_rand(&seed) % vec.size();
+        size_t idx = my_rand_r(&seed) % vec.size();
         void *ptr = vec[idx];
         vec[idx] = vec.back();
         vec.pop_back();
         free_aaa(ptr);
       } else {
-        size_t size = my_rand(&seed) % 1000 + 1;
+        size_t size = my_rand_r(&seed) % 1000 + 1;
 #ifndef __APPLE__
-        size_t alignment = 1 << (my_rand(&seed) % 7 + 3);
+        size_t alignment = 1 << (my_rand_r(&seed) % 7 + 3);
         char *ptr = (char*)memalign_aaa(alignment, size);
 #else
         char *ptr = (char*) malloc_aaa(size);
@@ -462,6 +340,24 @@
 }
 #endif
 
+#ifndef __APPLE__
+void MemalignRun(size_t align, size_t size, int idx) {
+  char *p = (char *)memalign(align, size);
+  Ident(p)[idx] = 0;
+  free(p);
+}
+
+TEST(AddressSanitizer, memalign) {
+  for (int align = 16; align <= (1 << 23); align *= 2) {
+    size_t size = align * 5;
+    EXPECT_DEATH(MemalignRun(align, size, -1),
+                 "is located 1 bytes to the left");
+    EXPECT_DEATH(MemalignRun(align, size, size + 1),
+                 "is located 1 bytes to the right");
+  }
+}
+#endif
+
 TEST(AddressSanitizer, ThreadedMallocStressTest) {
   const int kNumThreads = 4;
   const int kNumIterations = (ASAN_LOW_MEMORY) ? 10000 : 100000;
@@ -502,9 +398,36 @@
   ptr[3] = 3;
   for (int i = 0; i < 10000; i++) {
     ptr = (int*)realloc(ptr,
-        (my_rand(&global_seed) % 1000 + kMinElem) * sizeof(int));
+        (my_rand() % 1000 + kMinElem) * sizeof(int));
     EXPECT_EQ(3, ptr[3]);
   }
+  free(ptr);
+  // Realloc pointer returned by malloc(0).
+  int *ptr2 = Ident((int*)malloc(0));
+  ptr2 = Ident((int*)realloc(ptr2, sizeof(*ptr2)));
+  *ptr2 = 42;
+  EXPECT_EQ(42, *ptr2);
+  free(ptr2);
+}
+
+TEST(AddressSanitizer, ZeroSizeMallocTest) {
+  // Test that malloc(0) and similar functions don't return NULL.
+  void *ptr = Ident(malloc(0));
+  EXPECT_TRUE(NULL != ptr);
+  free(ptr);
+#if !defined(__APPLE__) && !defined(ANDROID) && !defined(__ANDROID__)
+  int pm_res = posix_memalign(&ptr, 1<<20, 0);
+  EXPECT_EQ(0, pm_res);
+  EXPECT_TRUE(NULL != ptr);
+  free(ptr);
+#endif
+  int *int_ptr = new int[0];
+  int *int_ptr2 = new int[0];
+  EXPECT_TRUE(NULL != int_ptr);
+  EXPECT_TRUE(NULL != int_ptr2);
+  EXPECT_NE(int_ptr, int_ptr2);
+  delete[] int_ptr;
+  delete[] int_ptr2;
 }
 
 #ifndef __APPLE__
@@ -790,785 +713,87 @@
 }
 #endif
 
-static string RightOOBErrorMessage(int oob_distance) {
+string RightOOBErrorMessage(int oob_distance, bool is_write) {
   assert(oob_distance >= 0);
   char expected_str[100];
-  sprintf(expected_str, "located %d bytes to the right", oob_distance);
+  sprintf(expected_str, ASAN_PCRE_DOTALL
+          "buffer-overflow.*%s.*located %d bytes to the right",
+          is_write ? "WRITE" : "READ", oob_distance);
   return string(expected_str);
 }
 
-static string LeftOOBErrorMessage(int oob_distance) {
+string RightOOBWriteMessage(int oob_distance) {
+  return RightOOBErrorMessage(oob_distance, /*is_write*/true);
+}
+
+string RightOOBReadMessage(int oob_distance) {
+  return RightOOBErrorMessage(oob_distance, /*is_write*/false);
+}
+
+string LeftOOBErrorMessage(int oob_distance, bool is_write) {
+  assert(oob_distance > 0);
+  char expected_str[100];
+  sprintf(expected_str, ASAN_PCRE_DOTALL "%s.*located %d bytes to the left",
+          is_write ? "WRITE" : "READ", oob_distance);
+  return string(expected_str);
+}
+
+string LeftOOBWriteMessage(int oob_distance) {
+  return LeftOOBErrorMessage(oob_distance, /*is_write*/true);
+}
+
+string LeftOOBReadMessage(int oob_distance) {
+  return LeftOOBErrorMessage(oob_distance, /*is_write*/false);
+}
+
+string LeftOOBAccessMessage(int oob_distance) {
   assert(oob_distance > 0);
   char expected_str[100];
   sprintf(expected_str, "located %d bytes to the left", oob_distance);
   return string(expected_str);
 }
 
-template<typename T>
-void MemSetOOBTestTemplate(size_t length) {
-  if (length == 0) return;
-  size_t size = Ident(sizeof(T) * length);
-  T *array = Ident((T*)malloc(size));
-  int element = Ident(42);
-  int zero = Ident(0);
-  // memset interval inside array
-  memset(array, element, size);
-  memset(array, element, size - 1);
-  memset(array + length - 1, element, sizeof(T));
-  memset(array, element, 1);
-
-  // memset 0 bytes
-  memset(array - 10, element, zero);
-  memset(array - 1, element, zero);
-  memset(array, element, zero);
-  memset(array + length, 0, zero);
-  memset(array + length + 1, 0, zero);
-
-  // try to memset bytes to the right of array
-  EXPECT_DEATH(memset(array, 0, size + 1),
-               RightOOBErrorMessage(0));
-  EXPECT_DEATH(memset((char*)(array + length) - 1, element, 6),
-               RightOOBErrorMessage(4));
-  EXPECT_DEATH(memset(array + 1, element, size + sizeof(T)),
-               RightOOBErrorMessage(2 * sizeof(T) - 1));
-  // whole interval is to the right
-  EXPECT_DEATH(memset(array + length + 1, 0, 10),
-               RightOOBErrorMessage(sizeof(T)));
-
-  // try to memset bytes to the left of array
-  EXPECT_DEATH(memset((char*)array - 1, element, size),
-               LeftOOBErrorMessage(1));
-  EXPECT_DEATH(memset((char*)array - 5, 0, 6),
-               LeftOOBErrorMessage(5));
-  EXPECT_DEATH(memset(array - 5, element, size + 5 * sizeof(T)),
-               LeftOOBErrorMessage(5 * sizeof(T)));
-  // whole interval is to the left
-  EXPECT_DEATH(memset(array - 2, 0, sizeof(T)),
-               LeftOOBErrorMessage(2 * sizeof(T)));
-
-  // try to memset bytes both to the left & to the right
-  EXPECT_DEATH(memset((char*)array - 2, element, size + 4),
-               LeftOOBErrorMessage(2));
-
-  free(array);
-}
-
-TEST(AddressSanitizer, MemSetOOBTest) {
-  MemSetOOBTestTemplate<char>(100);
-  MemSetOOBTestTemplate<int>(5);
-  MemSetOOBTestTemplate<double>(256);
-  // We can test arrays of structres/classes here, but what for?
-}
-
-// Same test for memcpy and memmove functions
-template <typename T, class M>
-void MemTransferOOBTestTemplate(size_t length) {
-  if (length == 0) return;
-  size_t size = Ident(sizeof(T) * length);
-  T *src = Ident((T*)malloc(size));
-  T *dest = Ident((T*)malloc(size));
-  int zero = Ident(0);
-
-  // valid transfer of bytes between arrays
-  M::transfer(dest, src, size);
-  M::transfer(dest + 1, src, size - sizeof(T));
-  M::transfer(dest, src + length - 1, sizeof(T));
-  M::transfer(dest, src, 1);
-
-  // transfer zero bytes
-  M::transfer(dest - 1, src, 0);
-  M::transfer(dest + length, src, zero);
-  M::transfer(dest, src - 1, zero);
-  M::transfer(dest, src, zero);
-
-  // try to change mem to the right of dest
-  EXPECT_DEATH(M::transfer(dest + 1, src, size),
-               RightOOBErrorMessage(sizeof(T) - 1));
-  EXPECT_DEATH(M::transfer((char*)(dest + length) - 1, src, 5),
-               RightOOBErrorMessage(3));
-
-  // try to change mem to the left of dest
-  EXPECT_DEATH(M::transfer(dest - 2, src, size),
-               LeftOOBErrorMessage(2 * sizeof(T)));
-  EXPECT_DEATH(M::transfer((char*)dest - 3, src, 4),
-               LeftOOBErrorMessage(3));
-
-  // try to access mem to the right of src
-  EXPECT_DEATH(M::transfer(dest, src + 2, size),
-               RightOOBErrorMessage(2 * sizeof(T) - 1));
-  EXPECT_DEATH(M::transfer(dest, (char*)(src + length) - 3, 6),
-               RightOOBErrorMessage(2));
-
-  // try to access mem to the left of src
-  EXPECT_DEATH(M::transfer(dest, src - 1, size),
-               LeftOOBErrorMessage(sizeof(T)));
-  EXPECT_DEATH(M::transfer(dest, (char*)src - 6, 7),
-               LeftOOBErrorMessage(6));
-
-  // Generally we don't need to test cases where both accessing src and writing
-  // to dest address to poisoned memory.
-
-  T *big_src = Ident((T*)malloc(size * 2));
-  T *big_dest = Ident((T*)malloc(size * 2));
-  // try to change mem to both sides of dest
-  EXPECT_DEATH(M::transfer(dest - 1, big_src, size * 2),
-               LeftOOBErrorMessage(sizeof(T)));
-  // try to access mem to both sides of src
-  EXPECT_DEATH(M::transfer(big_dest, src - 2, size * 2),
-               LeftOOBErrorMessage(2 * sizeof(T)));
-
-  free(src);
-  free(dest);
-  free(big_src);
-  free(big_dest);
-}
-
-class MemCpyWrapper {
- public:
-  static void* transfer(void *to, const void *from, size_t size) {
-    return memcpy(to, from, size);
-  }
-};
-TEST(AddressSanitizer, MemCpyOOBTest) {
-  MemTransferOOBTestTemplate<char, MemCpyWrapper>(100);
-  MemTransferOOBTestTemplate<int, MemCpyWrapper>(1024);
-}
-
-class MemMoveWrapper {
- public:
-  static void* transfer(void *to, const void *from, size_t size) {
-    return memmove(to, from, size);
-  }
-};
-TEST(AddressSanitizer, MemMoveOOBTest) {
-  MemTransferOOBTestTemplate<char, MemMoveWrapper>(100);
-  MemTransferOOBTestTemplate<int, MemMoveWrapper>(1024);
-}
-
-// Tests for string functions
-
-// Used for string functions tests
-static char global_string[] = "global";
-static size_t global_string_length = 6;
-
-// Input to a test is a zero-terminated string str with given length
-// Accesses to the bytes to the left and to the right of str
-// are presumed to produce OOB errors
-void StrLenOOBTestTemplate(char *str, size_t length, bool is_global) {
-  // Normal strlen calls
-  EXPECT_EQ(strlen(str), length);
-  if (length > 0) {
-    EXPECT_EQ(length - 1, strlen(str + 1));
-    EXPECT_EQ(0U, strlen(str + length));
-  }
-  // Arg of strlen is not malloced, OOB access
-  if (!is_global) {
-    // We don't insert RedZones to the left of global variables
-    EXPECT_DEATH(Ident(strlen(str - 1)), LeftOOBErrorMessage(1));
-    EXPECT_DEATH(Ident(strlen(str - 5)), LeftOOBErrorMessage(5));
-  }
-  EXPECT_DEATH(Ident(strlen(str + length + 1)), RightOOBErrorMessage(0));
-  // Overwrite terminator
-  str[length] = 'a';
-  // String is not zero-terminated, strlen will lead to OOB access
-  EXPECT_DEATH(Ident(strlen(str)), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(strlen(str + length)), RightOOBErrorMessage(0));
-  // Restore terminator
-  str[length] = 0;
-}
-TEST(AddressSanitizer, StrLenOOBTest) {
-  // Check heap-allocated string
-  size_t length = Ident(10);
-  char *heap_string = Ident((char*)malloc(length + 1));
-  char stack_string[10 + 1];
-  break_optimization(&stack_string);
-  for (size_t i = 0; i < length; i++) {
-    heap_string[i] = 'a';
-    stack_string[i] = 'b';
-  }
-  heap_string[length] = 0;
-  stack_string[length] = 0;
-  StrLenOOBTestTemplate(heap_string, length, false);
-  // TODO(samsonov): Fix expected messages in StrLenOOBTestTemplate to
-  //      make test for stack_string work. Or move it to output tests.
-  // StrLenOOBTestTemplate(stack_string, length, false);
-  StrLenOOBTestTemplate(global_string, global_string_length, true);
-  free(heap_string);
-}
-
-static inline char* MallocAndMemsetString(size_t size, char ch) {
+char* MallocAndMemsetString(size_t size, char ch) {
   char *s = Ident((char*)malloc(size));
   memset(s, ch, size);
   return s;
 }
-static inline char* MallocAndMemsetString(size_t size) {
+
+char* MallocAndMemsetString(size_t size) {
   return MallocAndMemsetString(size, 'z');
 }
 
-#ifndef __APPLE__
-TEST(AddressSanitizer, StrNLenOOBTest) {
-  size_t size = Ident(123);
-  char *str = MallocAndMemsetString(size);
-  // Normal strnlen calls.
-  Ident(strnlen(str - 1, 0));
-  Ident(strnlen(str, size));
-  Ident(strnlen(str + size - 1, 1));
-  str[size - 1] = '\0';
-  Ident(strnlen(str, 2 * size));
-  // Argument points to not allocated memory.
-  EXPECT_DEATH(Ident(strnlen(str - 1, 1)), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(strnlen(str + size, 1)), RightOOBErrorMessage(0));
-  // Overwrite the terminating '\0' and hit unallocated memory.
-  str[size - 1] = 'z';
-  EXPECT_DEATH(Ident(strnlen(str, size + 1)), RightOOBErrorMessage(0));
-  free(str);
-}
-#endif
+#if defined(__linux__) && !defined(ANDROID) && !defined(__ANDROID__)
+#define READ_TEST(READ_N_BYTES)                                          \
+  char *x = new char[10];                                                \
+  int fd = open("/proc/self/stat", O_RDONLY);                            \
+  ASSERT_GT(fd, 0);                                                      \
+  EXPECT_DEATH(READ_N_BYTES,                                             \
+               ASAN_PCRE_DOTALL                                          \
+               "AddressSanitizer: heap-buffer-overflow"                  \
+               ".* is located 0 bytes to the right of 10-byte region");  \
+  close(fd);                                                             \
+  delete [] x;                                                           \
 
-TEST(AddressSanitizer, StrDupOOBTest) {
-  size_t size = Ident(42);
-  char *str = MallocAndMemsetString(size);
-  char *new_str;
-  // Normal strdup calls.
-  str[size - 1] = '\0';
-  new_str = strdup(str);
-  free(new_str);
-  new_str = strdup(str + size - 1);
-  free(new_str);
-  // Argument points to not allocated memory.
-  EXPECT_DEATH(Ident(strdup(str - 1)), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(strdup(str + size)), RightOOBErrorMessage(0));
-  // Overwrite the terminating '\0' and hit unallocated memory.
-  str[size - 1] = 'z';
-  EXPECT_DEATH(Ident(strdup(str)), RightOOBErrorMessage(0));
-  free(str);
+TEST(AddressSanitizer, pread) {
+  READ_TEST(pread(fd, x, 15, 0));
 }
 
-TEST(AddressSanitizer, StrCpyOOBTest) {
-  size_t to_size = Ident(30);
-  size_t from_size = Ident(6);  // less than to_size
-  char *to = Ident((char*)malloc(to_size));
-  char *from = Ident((char*)malloc(from_size));
-  // Normal strcpy calls.
-  strcpy(from, "hello");
-  strcpy(to, from);
-  strcpy(to + to_size - from_size, from);
-  // Length of "from" is too small.
-  EXPECT_DEATH(Ident(strcpy(from, "hello2")), RightOOBErrorMessage(0));
-  // "to" or "from" points to not allocated memory.
-  EXPECT_DEATH(Ident(strcpy(to - 1, from)), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(strcpy(to, from - 1)), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(strcpy(to, from + from_size)), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(strcpy(to + to_size, from)), RightOOBErrorMessage(0));
-  // Overwrite the terminating '\0' character and hit unallocated memory.
-  from[from_size - 1] = '!';
-  EXPECT_DEATH(Ident(strcpy(to, from)), RightOOBErrorMessage(0));
-  free(to);
-  free(from);
+TEST(AddressSanitizer, pread64) {
+  READ_TEST(pread64(fd, x, 15, 0));
 }
 
-TEST(AddressSanitizer, StrNCpyOOBTest) {
-  size_t to_size = Ident(20);
-  size_t from_size = Ident(6);  // less than to_size
-  char *to = Ident((char*)malloc(to_size));
-  // From is a zero-terminated string "hello\0" of length 6
-  char *from = Ident((char*)malloc(from_size));
-  strcpy(from, "hello");
-  // copy 0 bytes
-  strncpy(to, from, 0);
-  strncpy(to - 1, from - 1, 0);
-  // normal strncpy calls
-  strncpy(to, from, from_size);
-  strncpy(to, from, to_size);
-  strncpy(to, from + from_size - 1, to_size);
-  strncpy(to + to_size - 1, from, 1);
-  // One of {to, from} points to not allocated memory
-  EXPECT_DEATH(Ident(strncpy(to, from - 1, from_size)),
-               LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(strncpy(to - 1, from, from_size)),
-               LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(strncpy(to, from + from_size, 1)),
-               RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(strncpy(to + to_size, from, 1)),
-               RightOOBErrorMessage(0));
-  // Length of "to" is too small
-  EXPECT_DEATH(Ident(strncpy(to + to_size - from_size + 1, from, from_size)),
-               RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(strncpy(to + 1, from, to_size)),
-               RightOOBErrorMessage(0));
-  // Overwrite terminator in from
-  from[from_size - 1] = '!';
-  // normal strncpy call
-  strncpy(to, from, from_size);
-  // Length of "from" is too small
-  EXPECT_DEATH(Ident(strncpy(to, from, to_size)),
-               RightOOBErrorMessage(0));
-  free(to);
-  free(from);
+TEST(AddressSanitizer, read) {
+  READ_TEST(read(fd, x, 15));
 }
-
-// Users may have different definitions of "strchr" and "index", so provide
-// function pointer typedefs and overload RunStrChrTest implementation.
-// We can't use macro for RunStrChrTest body here, as this macro would
-// confuse EXPECT_DEATH gtest macro.
-typedef char*(*PointerToStrChr1)(const char*, int);
-typedef char*(*PointerToStrChr2)(char*, int);
-
-USED static void RunStrChrTest(PointerToStrChr1 StrChr) {
-  size_t size = Ident(100);
-  char *str = MallocAndMemsetString(size);
-  str[10] = 'q';
-  str[11] = '\0';
-  EXPECT_EQ(str, StrChr(str, 'z'));
-  EXPECT_EQ(str + 10, StrChr(str, 'q'));
-  EXPECT_EQ(NULL, StrChr(str, 'a'));
-  // StrChr argument points to not allocated memory.
-  EXPECT_DEATH(Ident(StrChr(str - 1, 'z')), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(StrChr(str + size, 'z')), RightOOBErrorMessage(0));
-  // Overwrite the terminator and hit not allocated memory.
-  str[11] = 'z';
-  EXPECT_DEATH(Ident(StrChr(str, 'a')), RightOOBErrorMessage(0));
-  free(str);
-}
-USED static void RunStrChrTest(PointerToStrChr2 StrChr) {
-  size_t size = Ident(100);
-  char *str = MallocAndMemsetString(size);
-  str[10] = 'q';
-  str[11] = '\0';
-  EXPECT_EQ(str, StrChr(str, 'z'));
-  EXPECT_EQ(str + 10, StrChr(str, 'q'));
-  EXPECT_EQ(NULL, StrChr(str, 'a'));
-  // StrChr argument points to not allocated memory.
-  EXPECT_DEATH(Ident(StrChr(str - 1, 'z')), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(StrChr(str + size, 'z')), RightOOBErrorMessage(0));
-  // Overwrite the terminator and hit not allocated memory.
-  str[11] = 'z';
-  EXPECT_DEATH(Ident(StrChr(str, 'a')), RightOOBErrorMessage(0));
-  free(str);
-}
-
-TEST(AddressSanitizer, StrChrAndIndexOOBTest) {
-  RunStrChrTest(&strchr);
-  RunStrChrTest(&index);
-}
-
-TEST(AddressSanitizer, StrCmpAndFriendsLogicTest) {
-  // strcmp
-  EXPECT_EQ(0, strcmp("", ""));
-  EXPECT_EQ(0, strcmp("abcd", "abcd"));
-  EXPECT_GT(0, strcmp("ab", "ac"));
-  EXPECT_GT(0, strcmp("abc", "abcd"));
-  EXPECT_LT(0, strcmp("acc", "abc"));
-  EXPECT_LT(0, strcmp("abcd", "abc"));
-
-  // strncmp
-  EXPECT_EQ(0, strncmp("a", "b", 0));
-  EXPECT_EQ(0, strncmp("abcd", "abcd", 10));
-  EXPECT_EQ(0, strncmp("abcd", "abcef", 3));
-  EXPECT_GT(0, strncmp("abcde", "abcfa", 4));
-  EXPECT_GT(0, strncmp("a", "b", 5));
-  EXPECT_GT(0, strncmp("bc", "bcde", 4));
-  EXPECT_LT(0, strncmp("xyz", "xyy", 10));
-  EXPECT_LT(0, strncmp("baa", "aaa", 1));
-  EXPECT_LT(0, strncmp("zyx", "", 2));
-
-  // strcasecmp
-  EXPECT_EQ(0, strcasecmp("", ""));
-  EXPECT_EQ(0, strcasecmp("zzz", "zzz"));
-  EXPECT_EQ(0, strcasecmp("abCD", "ABcd"));
-  EXPECT_GT(0, strcasecmp("aB", "Ac"));
-  EXPECT_GT(0, strcasecmp("ABC", "ABCd"));
-  EXPECT_LT(0, strcasecmp("acc", "abc"));
-  EXPECT_LT(0, strcasecmp("ABCd", "abc"));
-
-  // strncasecmp
-  EXPECT_EQ(0, strncasecmp("a", "b", 0));
-  EXPECT_EQ(0, strncasecmp("abCD", "ABcd", 10));
-  EXPECT_EQ(0, strncasecmp("abCd", "ABcef", 3));
-  EXPECT_GT(0, strncasecmp("abcde", "ABCfa", 4));
-  EXPECT_GT(0, strncasecmp("a", "B", 5));
-  EXPECT_GT(0, strncasecmp("bc", "BCde", 4));
-  EXPECT_LT(0, strncasecmp("xyz", "xyy", 10));
-  EXPECT_LT(0, strncasecmp("Baa", "aaa", 1));
-  EXPECT_LT(0, strncasecmp("zyx", "", 2));
-
-  // memcmp
-  EXPECT_EQ(0, memcmp("a", "b", 0));
-  EXPECT_EQ(0, memcmp("ab\0c", "ab\0c", 4));
-  EXPECT_GT(0, memcmp("\0ab", "\0ac", 3));
-  EXPECT_GT(0, memcmp("abb\0", "abba", 4));
-  EXPECT_LT(0, memcmp("ab\0cd", "ab\0c\0", 5));
-  EXPECT_LT(0, memcmp("zza", "zyx", 3));
-}
-
-typedef int(*PointerToStrCmp)(const char*, const char*);
-void RunStrCmpTest(PointerToStrCmp StrCmp) {
-  size_t size = Ident(100);
-  char *s1 = MallocAndMemsetString(size);
-  char *s2 = MallocAndMemsetString(size);
-  s1[size - 1] = '\0';
-  s2[size - 1] = '\0';
-  // Normal StrCmp calls
-  Ident(StrCmp(s1, s2));
-  Ident(StrCmp(s1, s2 + size - 1));
-  Ident(StrCmp(s1 + size - 1, s2 + size - 1));
-  s1[size - 1] = 'z';
-  s2[size - 1] = 'x';
-  Ident(StrCmp(s1, s2));
-  // One of arguments points to not allocated memory.
-  EXPECT_DEATH(Ident(StrCmp)(s1 - 1, s2), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(StrCmp)(s1, s2 - 1), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(StrCmp)(s1 + size, s2), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(StrCmp)(s1, s2 + size), RightOOBErrorMessage(0));
-  // Hit unallocated memory and die.
-  s2[size - 1] = 'z';
-  EXPECT_DEATH(Ident(StrCmp)(s1, s1), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(StrCmp)(s1 + size - 1, s2), RightOOBErrorMessage(0));
-  free(s1);
-  free(s2);
-}
-
-TEST(AddressSanitizer, StrCmpOOBTest) {
-  RunStrCmpTest(&strcmp);
-}
-
-TEST(AddressSanitizer, StrCaseCmpOOBTest) {
-  RunStrCmpTest(&strcasecmp);
-}
-
-typedef int(*PointerToStrNCmp)(const char*, const char*, size_t);
-void RunStrNCmpTest(PointerToStrNCmp StrNCmp) {
-  size_t size = Ident(100);
-  char *s1 = MallocAndMemsetString(size);
-  char *s2 = MallocAndMemsetString(size);
-  s1[size - 1] = '\0';
-  s2[size - 1] = '\0';
-  // Normal StrNCmp calls
-  Ident(StrNCmp(s1, s2, size + 2));
-  s1[size - 1] = 'z';
-  s2[size - 1] = 'x';
-  Ident(StrNCmp(s1 + size - 2, s2 + size - 2, size));
-  s2[size - 1] = 'z';
-  Ident(StrNCmp(s1 - 1, s2 - 1, 0));
-  Ident(StrNCmp(s1 + size - 1, s2 + size - 1, 1));
-  // One of arguments points to not allocated memory.
-  EXPECT_DEATH(Ident(StrNCmp)(s1 - 1, s2, 1), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(StrNCmp)(s1, s2 - 1, 1), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(StrNCmp)(s1 + size, s2, 1), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(StrNCmp)(s1, s2 + size, 1), RightOOBErrorMessage(0));
-  // Hit unallocated memory and die.
-  EXPECT_DEATH(Ident(StrNCmp)(s1 + 1, s2 + 1, size), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(StrNCmp)(s1 + size - 1, s2, 2), RightOOBErrorMessage(0));
-  free(s1);
-  free(s2);
-}
-
-TEST(AddressSanitizer, StrNCmpOOBTest) {
-  RunStrNCmpTest(&strncmp);
-}
-
-TEST(AddressSanitizer, StrNCaseCmpOOBTest) {
-  RunStrNCmpTest(&strncasecmp);
-}
-
-TEST(AddressSanitizer, MemCmpOOBTest) {
-  size_t size = Ident(100);
-  char *s1 = MallocAndMemsetString(size);
-  char *s2 = MallocAndMemsetString(size);
-  // Normal memcmp calls.
-  Ident(memcmp(s1, s2, size));
-  Ident(memcmp(s1 + size - 1, s2 + size - 1, 1));
-  Ident(memcmp(s1 - 1, s2 - 1, 0));
-  // One of arguments points to not allocated memory.
-  EXPECT_DEATH(Ident(memcmp)(s1 - 1, s2, 1), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(memcmp)(s1, s2 - 1, 1), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(Ident(memcmp)(s1 + size, s2, 1), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(memcmp)(s1, s2 + size, 1), RightOOBErrorMessage(0));
-  // Hit unallocated memory and die.
-  EXPECT_DEATH(Ident(memcmp)(s1 + 1, s2 + 1, size), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Ident(memcmp)(s1 + size - 1, s2, 2), RightOOBErrorMessage(0));
-  // Zero bytes are not terminators and don't prevent from OOB.
-  s1[size - 1] = '\0';
-  s2[size - 1] = '\0';
-  EXPECT_DEATH(Ident(memcmp)(s1, s2, size + 1), RightOOBErrorMessage(0));
-  free(s1);
-  free(s2);
-}
-
-TEST(AddressSanitizer, StrCatOOBTest) {
-  size_t to_size = Ident(100);
-  char *to = MallocAndMemsetString(to_size);
-  to[0] = '\0';
-  size_t from_size = Ident(20);
-  char *from = MallocAndMemsetString(from_size);
-  from[from_size - 1] = '\0';
-  // Normal strcat calls.
-  strcat(to, from);
-  strcat(to, from);
-  strcat(to + from_size, from + from_size - 2);
-  // Passing an invalid pointer is an error even when concatenating an empty
-  // string.
-  EXPECT_DEATH(strcat(to - 1, from + from_size - 1), LeftOOBErrorMessage(1));
-  // One of arguments points to not allocated memory.
-  EXPECT_DEATH(strcat(to - 1, from), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(strcat(to, from - 1), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(strcat(to + to_size, from), RightOOBErrorMessage(0));
-  EXPECT_DEATH(strcat(to, from + from_size), RightOOBErrorMessage(0));
-
-  // "from" is not zero-terminated.
-  from[from_size - 1] = 'z';
-  EXPECT_DEATH(strcat(to, from), RightOOBErrorMessage(0));
-  from[from_size - 1] = '\0';
-  // "to" is not zero-terminated.
-  memset(to, 'z', to_size);
-  EXPECT_DEATH(strcat(to, from), RightOOBErrorMessage(0));
-  // "to" is too short to fit "from".
-  to[to_size - from_size + 1] = '\0';
-  EXPECT_DEATH(strcat(to, from), RightOOBErrorMessage(0));
-  // length of "to" is just enough.
-  strcat(to, from + 1);
-
-  free(to);
-  free(from);
-}
-
-TEST(AddressSanitizer, StrNCatOOBTest) {
-  size_t to_size = Ident(100);
-  char *to = MallocAndMemsetString(to_size);
-  to[0] = '\0';
-  size_t from_size = Ident(20);
-  char *from = MallocAndMemsetString(from_size);
-  // Normal strncat calls.
-  strncat(to, from, 0);
-  strncat(to, from, from_size);
-  from[from_size - 1] = '\0';
-  strncat(to, from, 2 * from_size);
-  // Catenating empty string with an invalid string is still an error.
-  EXPECT_DEATH(strncat(to - 1, from, 0), LeftOOBErrorMessage(1));
-  strncat(to, from + from_size - 1, 10);
-  // One of arguments points to not allocated memory.
-  EXPECT_DEATH(strncat(to - 1, from, 2), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(strncat(to, from - 1, 2), LeftOOBErrorMessage(1));
-  EXPECT_DEATH(strncat(to + to_size, from, 2), RightOOBErrorMessage(0));
-  EXPECT_DEATH(strncat(to, from + from_size, 2), RightOOBErrorMessage(0));
-
-  memset(from, 'z', from_size);
-  memset(to, 'z', to_size);
-  to[0] = '\0';
-  // "from" is too short.
-  EXPECT_DEATH(strncat(to, from, from_size + 1), RightOOBErrorMessage(0));
-  // "to" is not zero-terminated.
-  EXPECT_DEATH(strncat(to + 1, from, 1), RightOOBErrorMessage(0));
-  // "to" is too short to fit "from".
-  to[0] = 'z';
-  to[to_size - from_size + 1] = '\0';
-  EXPECT_DEATH(strncat(to, from, from_size - 1), RightOOBErrorMessage(0));
-  // "to" is just enough.
-  strncat(to, from, from_size - 2);
-
-  free(to);
-  free(from);
-}
-
-static string OverlapErrorMessage(const string &func) {
-  return func + "-param-overlap";
-}
-
-TEST(AddressSanitizer, StrArgsOverlapTest) {
-  size_t size = Ident(100);
-  char *str = Ident((char*)malloc(size));
-
-// Do not check memcpy() on OS X 10.7 and later, where it actually aliases
-// memmove().
-#if !defined(__APPLE__) || !defined(MAC_OS_X_VERSION_10_7) || \
-    (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7)
-  // Check "memcpy". Use Ident() to avoid inlining.
-  memset(str, 'z', size);
-  Ident(memcpy)(str + 1, str + 11, 10);
-  Ident(memcpy)(str, str, 0);
-  EXPECT_DEATH(Ident(memcpy)(str, str + 14, 15), OverlapErrorMessage("memcpy"));
-  EXPECT_DEATH(Ident(memcpy)(str + 14, str, 15), OverlapErrorMessage("memcpy"));
-#endif
-
-  // We do not treat memcpy with to==from as a bug.
-  // See http://llvm.org/bugs/show_bug.cgi?id=11763.
-  // EXPECT_DEATH(Ident(memcpy)(str + 20, str + 20, 1),
-  //              OverlapErrorMessage("memcpy"));
-
-  // Check "strcpy".
-  memset(str, 'z', size);
-  str[9] = '\0';
-  strcpy(str + 10, str);
-  EXPECT_DEATH(strcpy(str + 9, str), OverlapErrorMessage("strcpy"));
-  EXPECT_DEATH(strcpy(str, str + 4), OverlapErrorMessage("strcpy"));
-  strcpy(str, str + 5);
-
-  // Check "strncpy".
-  memset(str, 'z', size);
-  strncpy(str, str + 10, 10);
-  EXPECT_DEATH(strncpy(str, str + 9, 10), OverlapErrorMessage("strncpy"));
-  EXPECT_DEATH(strncpy(str + 9, str, 10), OverlapErrorMessage("strncpy"));
-  str[10] = '\0';
-  strncpy(str + 11, str, 20);
-  EXPECT_DEATH(strncpy(str + 10, str, 20), OverlapErrorMessage("strncpy"));
-
-  // Check "strcat".
-  memset(str, 'z', size);
-  str[10] = '\0';
-  str[20] = '\0';
-  strcat(str, str + 10);
-  EXPECT_DEATH(strcat(str, str + 11), OverlapErrorMessage("strcat"));
-  str[10] = '\0';
-  strcat(str + 11, str);
-  EXPECT_DEATH(strcat(str, str + 9), OverlapErrorMessage("strcat"));
-  EXPECT_DEATH(strcat(str + 9, str), OverlapErrorMessage("strcat"));
-  EXPECT_DEATH(strcat(str + 10, str), OverlapErrorMessage("strcat"));
-
-  // Check "strncat".
-  memset(str, 'z', size);
-  str[10] = '\0';
-  strncat(str, str + 10, 10);  // from is empty
-  EXPECT_DEATH(strncat(str, str + 11, 10), OverlapErrorMessage("strncat"));
-  str[10] = '\0';
-  str[20] = '\0';
-  strncat(str + 5, str, 5);
-  str[10] = '\0';
-  EXPECT_DEATH(strncat(str + 5, str, 6), OverlapErrorMessage("strncat"));
-  EXPECT_DEATH(strncat(str, str + 9, 10), OverlapErrorMessage("strncat"));
-
-  free(str);
-}
-
-void CallAtoi(const char *nptr) {
-  Ident(atoi(nptr));
-}
-void CallAtol(const char *nptr) {
-  Ident(atol(nptr));
-}
-void CallAtoll(const char *nptr) {
-  Ident(atoll(nptr));
-}
-typedef void(*PointerToCallAtoi)(const char*);
-
-void RunAtoiOOBTest(PointerToCallAtoi Atoi) {
-  char *array = MallocAndMemsetString(10, '1');
-  // Invalid pointer to the string.
-  EXPECT_DEATH(Atoi(array + 11), RightOOBErrorMessage(1));
-  EXPECT_DEATH(Atoi(array - 1), LeftOOBErrorMessage(1));
-  // Die if a buffer doesn't have terminating NULL.
-  EXPECT_DEATH(Atoi(array), RightOOBErrorMessage(0));
-  // Make last symbol a terminating NULL or other non-digit.
-  array[9] = '\0';
-  Atoi(array);
-  array[9] = 'a';
-  Atoi(array);
-  Atoi(array + 9);
-  // Sometimes we need to detect overflow if no digits are found.
-  memset(array, ' ', 10);
-  EXPECT_DEATH(Atoi(array), RightOOBErrorMessage(0));
-  array[9] = '-';
-  EXPECT_DEATH(Atoi(array), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Atoi(array + 9), RightOOBErrorMessage(0));
-  array[8] = '-';
-  Atoi(array);
-  delete array;
-}
-
-TEST(AddressSanitizer, AtoiAndFriendsOOBTest) {
-  RunAtoiOOBTest(&CallAtoi);
-  RunAtoiOOBTest(&CallAtol);
-  RunAtoiOOBTest(&CallAtoll);
-}
-
-void CallStrtol(const char *nptr, char **endptr, int base) {
-  Ident(strtol(nptr, endptr, base));
-}
-void CallStrtoll(const char *nptr, char **endptr, int base) {
-  Ident(strtoll(nptr, endptr, base));
-}
-typedef void(*PointerToCallStrtol)(const char*, char**, int);
-
-void RunStrtolOOBTest(PointerToCallStrtol Strtol) {
-  char *array = MallocAndMemsetString(3);
-  char *endptr = NULL;
-  array[0] = '1';
-  array[1] = '2';
-  array[2] = '3';
-  // Invalid pointer to the string.
-  EXPECT_DEATH(Strtol(array + 3, NULL, 0), RightOOBErrorMessage(0));
-  EXPECT_DEATH(Strtol(array - 1, NULL, 0), LeftOOBErrorMessage(1));
-  // Buffer overflow if there is no terminating null (depends on base).
-  Strtol(array, &endptr, 3);
-  EXPECT_EQ(array + 2, endptr);
-  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBErrorMessage(0));
-  array[2] = 'z';
-  Strtol(array, &endptr, 35);
-  EXPECT_EQ(array + 2, endptr);
-  EXPECT_DEATH(Strtol(array, NULL, 36), RightOOBErrorMessage(0));
-  // Add terminating zero to get rid of overflow.
-  array[2] = '\0';
-  Strtol(array, NULL, 36);
-  // Don't check for overflow if base is invalid.
-  Strtol(array - 1, NULL, -1);
-  Strtol(array + 3, NULL, 1);
-  // Sometimes we need to detect overflow if no digits are found.
-  array[0] = array[1] = array[2] = ' ';
-  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBErrorMessage(0));
-  array[2] = '+';
-  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBErrorMessage(0));
-  array[2] = '-';
-  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBErrorMessage(0));
-  array[1] = '+';
-  Strtol(array, NULL, 0);
-  array[1] = array[2] = 'z';
-  Strtol(array, &endptr, 0);
-  EXPECT_EQ(array, endptr);
-  Strtol(array + 2, NULL, 0);
-  EXPECT_EQ(array, endptr);
-  delete array;
-}
-
-TEST(AddressSanitizer, StrtollOOBTest) {
-  RunStrtolOOBTest(&CallStrtoll);
-}
-TEST(AddressSanitizer, StrtolOOBTest) {
-  RunStrtolOOBTest(&CallStrtol);
-}
-
-// At the moment we instrument memcpy/memove/memset calls at compile time so we
-// can't handle OOB error if these functions are called by pointer, see disabled
-// MemIntrinsicCallByPointerTest below
-typedef void*(*PointerToMemTransfer)(void*, const void*, size_t);
-typedef void*(*PointerToMemSet)(void*, int, size_t);
-
-void CallMemSetByPointer(PointerToMemSet MemSet) {
-  size_t size = Ident(100);
-  char *array = Ident((char*)malloc(size));
-  EXPECT_DEATH(MemSet(array, 0, 101), RightOOBErrorMessage(0));
-  free(array);
-}
-
-void CallMemTransferByPointer(PointerToMemTransfer MemTransfer) {
-  size_t size = Ident(100);
-  char *src = Ident((char*)malloc(size));
-  char *dst = Ident((char*)malloc(size));
-  EXPECT_DEATH(MemTransfer(dst, src, 101), RightOOBErrorMessage(0));
-  free(src);
-  free(dst);
-}
-
-TEST(AddressSanitizer, DISABLED_MemIntrinsicCallByPointerTest) {
-  CallMemSetByPointer(&memset);
-  CallMemTransferByPointer(&memcpy);
-  CallMemTransferByPointer(&memmove);
-}
+#endif  // defined(__linux__) && !defined(ANDROID) && !defined(__ANDROID__)
 
 // This test case fails
 // Clang optimizes memcpy/memset calls which lead to unaligned access
 TEST(AddressSanitizer, DISABLED_MemIntrinsicUnalignedAccessTest) {
   int size = Ident(4096);
   char *s = Ident((char*)malloc(size));
-  EXPECT_DEATH(memset(s + size - 1, 0, 2), RightOOBErrorMessage(0));
+  EXPECT_DEATH(memset(s + size - 1, 0, 2), RightOOBWriteMessage(0));
   free(s);
 }
 
@@ -1623,28 +848,30 @@
                "malloc_fff.*malloc_eee.*malloc_ddd");
 }
 
-static void TryToSetThreadName(const char *name) {
-#ifdef __linux__
-  prctl(PR_SET_NAME, (unsigned long)name, 0, 0, 0);
+static bool TryToSetThreadName(const char *name) {
+#if defined(__linux__) && defined(PR_SET_NAME)
+  return 0 == prctl(PR_SET_NAME, (unsigned long)name, 0, 0, 0);
+#else
+  return false;
 #endif
 }
 
 void *ThreadedTestAlloc(void *a) {
-  TryToSetThreadName("AllocThr");
+  EXPECT_EQ(true, TryToSetThreadName("AllocThr"));
   int **p = (int**)a;
   *p = new int;
   return 0;
 }
 
 void *ThreadedTestFree(void *a) {
-  TryToSetThreadName("FreeThr");
+  EXPECT_EQ(true, TryToSetThreadName("FreeThr"));
   int **p = (int**)a;
   delete *p;
   return 0;
 }
 
 void *ThreadedTestUse(void *a) {
-  TryToSetThreadName("UseThr");
+  EXPECT_EQ(true, TryToSetThreadName("UseThr"));
   int **p = (int**)a;
   **p = 1;
   return 0;
@@ -1669,20 +896,29 @@
                ".*Thread T.*created");
 }
 
-#ifdef __linux__
-TEST(AddressSanitizer, ThreadNamesTest) {
-  // ThreadedTestSpawn();
+void *ThreadedTestFunc(void *unused) {
+  // Check if prctl(PR_SET_NAME) is supported. Return if not.
+  if (!TryToSetThreadName("TestFunc"))
+    return 0;
   EXPECT_DEATH(ThreadedTestSpawn(),
                ASAN_PCRE_DOTALL
-               "WRITE .*thread T3 .UseThr."
-               ".*freed by thread T2 .FreeThr. here:"
-               ".*previously allocated by thread T1 .AllocThr. here:"
-               ".*Thread T3 .UseThr. created by T0 here:"
-               ".*Thread T2 .FreeThr. created by T0 here:"
-               ".*Thread T1 .AllocThr. created by T0 here:"
+               "WRITE .*thread T. .UseThr."
+               ".*freed by thread T. .FreeThr. here:"
+               ".*previously allocated by thread T. .AllocThr. here:"
+               ".*Thread T. .UseThr. created by T.*TestFunc"
+               ".*Thread T. .FreeThr. created by T"
+               ".*Thread T. .AllocThr. created by T"
                "");
+  return 0;
 }
-#endif
+
+TEST(AddressSanitizer, ThreadNamesTest) {
+  // Run ThreadedTestFunc in a separate thread because it tries to set a
+  // thread name and we don't want to change the main thread's name.
+  pthread_t t;
+  PTHREAD_CREATE(&t, 0, ThreadedTestFunc, 0);
+  PTHREAD_JOIN(t, 0);
+}
 
 #if ASAN_NEEDS_SEGV
 TEST(AddressSanitizer, ShadowGapTest) {
@@ -1713,11 +949,9 @@
 }
 
 // Currently we create and poison redzone at right of global variables.
-char glob5[5];
 static char static110[110];
 const char ConstGlob[7] = {1, 2, 3, 4, 5, 6, 7};
 static const char StaticConstGlob[3] = {9, 8, 7};
-extern int GlobalsTest(int x);
 
 TEST(AddressSanitizer, GlobalTest) {
   static char func_static15[15];
@@ -1898,6 +1132,28 @@
   Ident(NoAddressSafety)();
 }
 
+// It doesn't work on Android, as calls to new/delete go through malloc/free.
+#if !defined(ANDROID) && !defined(__ANDROID__)
+static string MismatchStr(const string &str) {
+  return string("AddressSanitizer: alloc-dealloc-mismatch \\(") + str;
+}
+
+TEST(AddressSanitizer, AllocDeallocMismatch) {
+  EXPECT_DEATH(free(Ident(new int)),
+               MismatchStr("operator new vs free"));
+  EXPECT_DEATH(free(Ident(new int[2])),
+               MismatchStr("operator new \\[\\] vs free"));
+  EXPECT_DEATH(delete (Ident(new int[2])),
+               MismatchStr("operator new \\[\\] vs operator delete"));
+  EXPECT_DEATH(delete (Ident((int*)malloc(2 * sizeof(int)))),
+               MismatchStr("malloc vs operator delete"));
+  EXPECT_DEATH(delete [] (Ident(new int)),
+               MismatchStr("operator new vs operator delete \\[\\]"));
+  EXPECT_DEATH(delete [] (Ident((int*)malloc(2 * sizeof(int)))),
+               MismatchStr("malloc vs operator delete \\[\\]"));
+}
+#endif
+
 // ------------------ demo tests; run each one-by-one -------------
 // e.g. --gtest_filter=*DemoOOBLeftHigh --gtest_also_run_disabled_tests
 TEST(AddressSanitizer, DISABLED_DemoThreadedTest) {
@@ -1934,22 +1190,6 @@
   uaf_test<U1>(kLargeMalloc, 0);
 }
 
-TEST(AddressSanitizer, DISABLED_DemoOOBLeftLow) {
-  oob_test<U1>(10, -1);
-}
-
-TEST(AddressSanitizer, DISABLED_DemoOOBLeftHigh) {
-  oob_test<U1>(kLargeMalloc, -1);
-}
-
-TEST(AddressSanitizer, DISABLED_DemoOOBRightLow) {
-  oob_test<U1>(10, 10);
-}
-
-TEST(AddressSanitizer, DISABLED_DemoOOBRightHigh) {
-  oob_test<U1>(kLargeMalloc, kLargeMalloc);
-}
-
 TEST(AddressSanitizer, DISABLED_DemoOOM) {
   size_t size = SANITIZER_WORDSIZE == 64 ? (size_t)(1ULL << 40) : (0xf0000000);
   printf("%p\n", malloc(size));
@@ -1997,220 +1237,6 @@
   delete [] Ident(x);
 }
 
-#ifdef __APPLE__
-#include "asan_mac_test.h"
-TEST(AddressSanitizerMac, CFAllocatorDefaultDoubleFree) {
-  EXPECT_DEATH(
-      CFAllocatorDefaultDoubleFree(NULL),
-      "attempting double-free");
-}
-
-void CFAllocator_DoubleFreeOnPthread() {
-  pthread_t child;
-  PTHREAD_CREATE(&child, NULL, CFAllocatorDefaultDoubleFree, NULL);
-  PTHREAD_JOIN(child, NULL);  // Shouldn't be reached.
-}
-
-TEST(AddressSanitizerMac, CFAllocatorDefaultDoubleFree_ChildPhread) {
-  EXPECT_DEATH(CFAllocator_DoubleFreeOnPthread(), "attempting double-free");
-}
-
-namespace {
-
-void *GLOB;
-
-void *CFAllocatorAllocateToGlob(void *unused) {
-  GLOB = CFAllocatorAllocate(NULL, 100, /*hint*/0);
-  return NULL;
-}
-
-void *CFAllocatorDeallocateFromGlob(void *unused) {
-  char *p = (char*)GLOB;
-  p[100] = 'A';  // ASan should report an error here.
-  CFAllocatorDeallocate(NULL, GLOB);
-  return NULL;
-}
-
-void CFAllocator_PassMemoryToAnotherThread() {
-  pthread_t th1, th2;
-  PTHREAD_CREATE(&th1, NULL, CFAllocatorAllocateToGlob, NULL);
-  PTHREAD_JOIN(th1, NULL);
-  PTHREAD_CREATE(&th2, NULL, CFAllocatorDeallocateFromGlob, NULL);
-  PTHREAD_JOIN(th2, NULL);
-}
-
-TEST(AddressSanitizerMac, CFAllocator_PassMemoryToAnotherThread) {
-  EXPECT_DEATH(CFAllocator_PassMemoryToAnotherThread(),
-               "heap-buffer-overflow");
-}
-
-}  // namespace
-
-// TODO(glider): figure out whether we still need these tests. Is it correct
-// to intercept the non-default CFAllocators?
-TEST(AddressSanitizerMac, DISABLED_CFAllocatorSystemDefaultDoubleFree) {
-  EXPECT_DEATH(
-      CFAllocatorSystemDefaultDoubleFree(),
-      "attempting double-free");
-}
-
-// We're intercepting malloc, so kCFAllocatorMalloc is routed to ASan.
-TEST(AddressSanitizerMac, CFAllocatorMallocDoubleFree) {
-  EXPECT_DEATH(CFAllocatorMallocDoubleFree(), "attempting double-free");
-}
-
-TEST(AddressSanitizerMac, DISABLED_CFAllocatorMallocZoneDoubleFree) {
-  EXPECT_DEATH(CFAllocatorMallocZoneDoubleFree(), "attempting double-free");
-}
-
-TEST(AddressSanitizerMac, GCDDispatchAsync) {
-  // Make sure the whole ASan report is printed, i.e. that we don't die
-  // on a CHECK.
-  EXPECT_DEATH(TestGCDDispatchAsync(), "Shadow byte and word");
-}
-
-TEST(AddressSanitizerMac, GCDDispatchSync) {
-  // Make sure the whole ASan report is printed, i.e. that we don't die
-  // on a CHECK.
-  EXPECT_DEATH(TestGCDDispatchSync(), "Shadow byte and word");
-}
-
-
-TEST(AddressSanitizerMac, GCDReuseWqthreadsAsync) {
-  // Make sure the whole ASan report is printed, i.e. that we don't die
-  // on a CHECK.
-  EXPECT_DEATH(TestGCDReuseWqthreadsAsync(), "Shadow byte and word");
-}
-
-TEST(AddressSanitizerMac, GCDReuseWqthreadsSync) {
-  // Make sure the whole ASan report is printed, i.e. that we don't die
-  // on a CHECK.
-  EXPECT_DEATH(TestGCDReuseWqthreadsSync(), "Shadow byte and word");
-}
-
-TEST(AddressSanitizerMac, GCDDispatchAfter) {
-  // Make sure the whole ASan report is printed, i.e. that we don't die
-  // on a CHECK.
-  EXPECT_DEATH(TestGCDDispatchAfter(), "Shadow byte and word");
-}
-
-TEST(AddressSanitizerMac, GCDSourceEvent) {
-  // Make sure the whole ASan report is printed, i.e. that we don't die
-  // on a CHECK.
-  EXPECT_DEATH(TestGCDSourceEvent(), "Shadow byte and word");
-}
-
-TEST(AddressSanitizerMac, GCDSourceCancel) {
-  // Make sure the whole ASan report is printed, i.e. that we don't die
-  // on a CHECK.
-  EXPECT_DEATH(TestGCDSourceCancel(), "Shadow byte and word");
-}
-
-TEST(AddressSanitizerMac, GCDGroupAsync) {
-  // Make sure the whole ASan report is printed, i.e. that we don't die
-  // on a CHECK.
-  EXPECT_DEATH(TestGCDGroupAsync(), "Shadow byte and word");
-}
-
-void *MallocIntrospectionLockWorker(void *_) {
-  const int kNumPointers = 100;
-  int i;
-  void *pointers[kNumPointers];
-  for (i = 0; i < kNumPointers; i++) {
-    pointers[i] = malloc(i + 1);
-  }
-  for (i = 0; i < kNumPointers; i++) {
-    free(pointers[i]);
-  }
-
-  return NULL;
-}
-
-void *MallocIntrospectionLockForker(void *_) {
-  pid_t result = fork();
-  if (result == -1) {
-    perror("fork");
-  }
-  assert(result != -1);
-  if (result == 0) {
-    // Call malloc in the child process to make sure we won't deadlock.
-    void *ptr = malloc(42);
-    free(ptr);
-    exit(0);
-  } else {
-    // Return in the parent process.
-    return NULL;
-  }
-}
-
-TEST(AddressSanitizerMac, MallocIntrospectionLock) {
-  // Incorrect implementation of force_lock and force_unlock in our malloc zone
-  // will cause forked processes to deadlock.
-  // TODO(glider): need to detect that none of the child processes deadlocked.
-  const int kNumWorkers = 5, kNumIterations = 100;
-  int i, iter;
-  for (iter = 0; iter < kNumIterations; iter++) {
-    pthread_t workers[kNumWorkers], forker;
-    for (i = 0; i < kNumWorkers; i++) {
-      PTHREAD_CREATE(&workers[i], 0, MallocIntrospectionLockWorker, 0);
-    }
-    PTHREAD_CREATE(&forker, 0, MallocIntrospectionLockForker, 0);
-    for (i = 0; i < kNumWorkers; i++) {
-      PTHREAD_JOIN(workers[i], 0);
-    }
-    PTHREAD_JOIN(forker, 0);
-  }
-}
-
-void *TSDAllocWorker(void *test_key) {
-  if (test_key) {
-    void *mem = malloc(10);
-    pthread_setspecific(*(pthread_key_t*)test_key, mem);
-  }
-  return NULL;
-}
-
-TEST(AddressSanitizerMac, DISABLED_TSDWorkqueueTest) {
-  pthread_t th;
-  pthread_key_t test_key;
-  pthread_key_create(&test_key, CallFreeOnWorkqueue);
-  PTHREAD_CREATE(&th, NULL, TSDAllocWorker, &test_key);
-  PTHREAD_JOIN(th, NULL);
-  pthread_key_delete(test_key);
-}
-
-// Test that CFStringCreateCopy does not copy constant strings.
-TEST(AddressSanitizerMac, CFStringCreateCopy) {
-  CFStringRef str = CFSTR("Hello world!\n");
-  CFStringRef str2 = CFStringCreateCopy(0, str);
-  EXPECT_EQ(str, str2);
-}
-
-TEST(AddressSanitizerMac, NSObjectOOB) {
-  // Make sure that our allocators are used for NSObjects.
-  EXPECT_DEATH(TestOOBNSObjects(), "heap-buffer-overflow");
-}
-
-// Make sure that correct pointer is passed to free() when deallocating a
-// NSURL object.
-// See http://code.google.com/p/address-sanitizer/issues/detail?id=70.
-TEST(AddressSanitizerMac, NSURLDeallocation) {
-  TestNSURLDeallocation();
-}
-
-// See http://code.google.com/p/address-sanitizer/issues/detail?id=109.
-TEST(AddressSanitizerMac, Mstats) {
-  malloc_statistics_t stats1, stats2;
-  malloc_zone_statistics(/*all zones*/NULL, &stats1);
-  const int kMallocSize = 100000;
-  void *alloc = Ident(malloc(kMallocSize));
-  malloc_zone_statistics(/*all zones*/NULL, &stats2);
-  EXPECT_GT(stats2.blocks_in_use, stats1.blocks_in_use);
-  EXPECT_GE(stats2.size_in_use - stats1.size_in_use, kMallocSize);
-  free(alloc);
-  // Even the default OSX allocator may not change the stats after free().
-}
-#endif  // __APPLE__
 
 // Test that instrumentation of stack allocations takes into account
 // AllocSize of a type, and not its StoreSize (16 vs 10 bytes for long double).
diff --git a/lib/asan/tests/asan_test_utils.h b/lib/asan/tests/asan_test_utils.h
index f810438..4037731 100644
--- a/lib/asan/tests/asan_test_utils.h
+++ b/lib/asan/tests/asan_test_utils.h
@@ -20,56 +20,92 @@
 # undef INCLUDED_FROM_ASAN_TEST_UTILS_H
 #endif
 
-#if defined(_WIN32)
-typedef unsigned __int8  uint8_t;
-typedef unsigned __int16 uint16_t;
-typedef unsigned __int32 uint32_t;
-typedef unsigned __int64 uint64_t;
-typedef __int8           int8_t;
-typedef __int16          int16_t;
-typedef __int32          int32_t;
-typedef __int64          int64_t;
-# define NOINLINE __declspec(noinline)
-# define USED
-#else  // defined(_WIN32)
-# define NOINLINE __attribute__((noinline))
-# define USED __attribute__((used))
-#endif  // defined(_WIN32)
+#include "sanitizer_test_utils.h"
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <setjmp.h>
+#include <assert.h>
+#include <algorithm>
+#include <sys/mman.h>
 
-#if !defined(__has_feature)
-#define __has_feature(x) 0
+#ifdef __linux__
+# include <sys/prctl.h>
+# include <sys/types.h>
+# include <sys/stat.h>
+# include <fcntl.h>
+#include <unistd.h>
 #endif
 
-#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
-# define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
-    __attribute__((no_address_safety_analysis))
-#else
-# define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
+#if defined(__i386__) || defined(__x86_64__)
+#include <emmintrin.h>
 #endif
 
-#if __LP64__ || defined(_WIN64)
-#  define SANITIZER_WORDSIZE 64
-#else
-#  define SANITIZER_WORDSIZE 32
+#ifndef __APPLE__
+#include <malloc.h>
 #endif
 
-// Make the compiler thinks that something is going on there.
-inline void break_optimization(void *arg) {
-  __asm__ __volatile__("" : : "r" (arg) : "memory");
-}
-
-// This function returns its parameter but in such a way that compiler
-// can not prove it.
-template<class T>
-NOINLINE
-static T Ident(T t) {
-  T ret = t;
-  break_optimization(&ret);
-  return ret;
-}
-
 // Check that pthread_create/pthread_join return success.
 #define PTHREAD_CREATE(a, b, c, d) ASSERT_EQ(0, pthread_create(a, b, c, d))
 #define PTHREAD_JOIN(a, b) ASSERT_EQ(0, pthread_join(a, b))
 
+#if ASAN_HAS_EXCEPTIONS
+# define ASAN_THROW(x) throw (x)
+#else
+# define ASAN_THROW(x)
+#endif
+
+typedef uint8_t   U1;
+typedef uint16_t  U2;
+typedef uint32_t  U4;
+typedef uint64_t  U8;
+
+static const int kPageSize = 4096;
+
+const size_t kLargeMalloc = 1 << 24;
+
+extern void free_aaa(void *p);
+extern void *malloc_aaa(size_t size);
+
+template<typename T>
+NOINLINE void asan_write(T *a) {
+  *a = 0;
+}
+
+string RightOOBErrorMessage(int oob_distance, bool is_write);
+string RightOOBWriteMessage(int oob_distance);
+string RightOOBReadMessage(int oob_distance);
+string LeftOOBErrorMessage(int oob_distance, bool is_write);
+string LeftOOBWriteMessage(int oob_distance);
+string LeftOOBReadMessage(int oob_distance);
+string LeftOOBAccessMessage(int oob_distance);
+char* MallocAndMemsetString(size_t size, char ch);
+char* MallocAndMemsetString(size_t size);
+
+extern char glob1[1];
+extern char glob2[2];
+extern char glob3[3];
+extern char glob4[4];
+extern char glob5[5];
+extern char glob6[6];
+extern char glob7[7];
+extern char glob8[8];
+extern char glob9[9];
+extern char glob10[10];
+extern char glob11[11];
+extern char glob12[12];
+extern char glob13[13];
+extern char glob14[14];
+extern char glob15[15];
+extern char glob16[16];
+extern char glob17[17];
+extern char glob1000[1000];
+extern char glob10000[10000];
+extern char glob100000[100000];
+extern int GlobalsTest(int x);
+
 #endif  // ASAN_TEST_UTILS_H
diff --git a/lib/cmpdi2.c b/lib/cmpdi2.c
index c2b1f69..52634d9 100644
--- a/lib/cmpdi2.c
+++ b/lib/cmpdi2.c
@@ -36,3 +36,16 @@
         return 2;
     return 1;
 }
+
+#ifdef __ARM_EABI__
+/* Returns: if (a <  b) returns -1
+*           if (a == b) returns  0
+*           if (a >  b) returns  1
+*/
+COMPILER_RT_ABI si_int
+__aeabi_lcmp(di_int a, di_int b)
+{
+	return __cmpdi2(a, b) - 1;
+}
+#endif
+
diff --git a/lib/comparedf2.c b/lib/comparedf2.c
index fe35fd8..de67784 100644
--- a/lib/comparedf2.c
+++ b/lib/comparedf2.c
@@ -106,6 +106,8 @@
     }
 }
 
+ARM_EABI_FNALIAS(dcmpun, unorddf2)
+
 int __unorddf2(fp_t a, fp_t b) {
     const rep_t aAbs = toRep(a) & absMask;
     const rep_t bAbs = toRep(b) & absMask;
diff --git a/lib/comparesf2.c b/lib/comparesf2.c
index 3f2e358..c1c3a47 100644
--- a/lib/comparesf2.c
+++ b/lib/comparesf2.c
@@ -106,6 +106,8 @@
     }
 }
 
+ARM_EABI_FNALIAS(fcmpun, unordsf2)
+
 int __unordsf2(fp_t a, fp_t b) {
     const rep_t aAbs = toRep(a) & absMask;
     const rep_t bAbs = toRep(b) & absMask;
diff --git a/lib/fixsfdi.c b/lib/fixsfdi.c
index 8a06690..4f6cfdd 100644
--- a/lib/fixsfdi.c
+++ b/lib/fixsfdi.c
@@ -23,7 +23,7 @@
 
 /* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
 
-ARM_EABI_FNALIAS(d2lz, fixsfdi)
+ARM_EABI_FNALIAS(f2lz, fixsfdi)
 
 COMPILER_RT_ABI di_int
 __fixsfdi(float a)
diff --git a/lib/interception/CMakeLists.txt b/lib/interception/CMakeLists.txt
index 2ebb9be..cd9e6e7 100644
--- a/lib/interception/CMakeLists.txt
+++ b/lib/interception/CMakeLists.txt
@@ -4,44 +4,27 @@
   interception_linux.cc
   interception_mac.cc
   interception_win.cc
+  interception_type_test.cc
   )
 
-set(MACH_OVERRIDE_SOURCES
-  mach_override/mach_override.c
-  )
-
-# Only add this C file if we're building on a Mac. Other source files can be
-# harmlessly compiled on any platform, but the C file is complained about due
-# to pedantic rules about empty translation units.
-if (APPLE)
-  list(APPEND INTERCEPTION_SOURCES ${MACH_OVERRIDE_SOURCES})
-  set_source_files_properties(${MACH_OVERRIDE_SOURCES} PROPERTIES COMPILE_FLAGS "-std=c99 ${INTERCEPTION_CFLAGS}")
-endif ()
+include_directories(..)
 
 set(INTERCEPTION_CFLAGS ${SANITIZER_COMMON_CFLAGS})
 
 if(APPLE)
   # Build universal binary on APPLE.
-  add_library(RTInterception.osx OBJECT ${INTERCEPTION_SOURCES})
-  set_target_compile_flags(RTInterception.osx ${INTERCEPTION_CFLAGS})
-  filter_available_targets(INTERCEPTION_TARGETS x86_64 i386)
-  set_target_properties(RTInterception.osx PROPERTIES
-    OSX_ARCHITECTURES "${INTERCEPTION_TARGETS}")
+  add_compiler_rt_osx_object_library(RTInterception
+    ARCH ${SANITIZER_COMMON_SUPPORTED_ARCH}
+    SOURCES ${INTERCEPTION_SOURCES}
+    CFLAGS ${INTERCEPTION_CFLAGS})
+elseif(ANDROID)
+  add_library(RTInterception.arm.android OBJECT ${INTERCEPTION_SOURCES})
+  set_target_compile_flags(RTInterception.arm.android
+    ${INTERCEPTION_CFLAGS})
 else()
   # Otherwise, build separate libraries for each target.
-  if(CAN_TARGET_X86_64)
-    add_library(RTInterception.x86_64 OBJECT ${INTERCEPTION_SOURCES})
-    set_target_compile_flags(RTInterception.x86_64
-      ${INTERCEPTION_CFLAGS} ${TARGET_X86_64_CFLAGS})
-  endif()
-  if(CAN_TARGET_I386)
-    add_library(RTInterception.i386 OBJECT ${INTERCEPTION_SOURCES})
-    set_target_compile_flags(RTInterception.i386
-      ${INTERCEPTION_CFLAGS} ${TARGET_I386_CFLAGS})
-  endif()
-  if(ANDROID)
-    add_library(RTInterception.arm.android OBJECT ${INTERCEPTION_SOURCES})
-    set_target_compile_flags(RTInterception.arm.android
-      ${INTERCEPTION_CFLAGS})
-  endif()
+  foreach(arch ${SANITIZER_COMMON_SUPPORTED_ARCH})
+    add_compiler_rt_object_library(RTInterception ${arch}
+      SOURCES ${INTERCEPTION_SOURCES} CFLAGS ${INTERCEPTION_CFLAGS})
+  endforeach()
 endif()
diff --git a/lib/interception/Makefile.mk b/lib/interception/Makefile.mk
index 1412a01..88aa6cb 100644
--- a/lib/interception/Makefile.mk
+++ b/lib/interception/Makefile.mk
@@ -8,7 +8,7 @@
 #===------------------------------------------------------------------------===#
 
 ModuleName := interception
-SubDirs := mach_override
+SubDirs :=
 
 Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
 ObjNames := $(Sources:%.cc=%.o)
@@ -17,7 +17,7 @@
 
 # FIXME: use automatic dependencies?
 Dependencies := $(wildcard $(Dir)/*.h)
-Dependencies += $(wildcard $(Dir)/mach_override/*.h)
+Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
 
 # Define a convenience variable for all the interception functions.
 InterceptionFunctions := $(Sources:%.cc=%)
diff --git a/lib/interception/interception.h b/lib/interception/interception.h
index 7dad07f..2ccc903 100644
--- a/lib/interception/interception.h
+++ b/lib/interception/interception.h
@@ -19,17 +19,16 @@
 # error "Interception doesn't work on this operating system."
 #endif
 
-// How to use this library:
-//      1) Include this header to define your own interceptors
-//         (see details below).
-//      2) Build all *.cc files and link against them.
-// On Mac you will also need to:
-//      3) Provide your own implementation for the following functions:
-//           mach_error_t __interception::allocate_island(void **ptr,
-//                                                      size_t size,
-//                                                      void *hint);
-//           mach_error_t __interception::deallocate_island(void *ptr);
-//         See "interception_mac.h" for more details.
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+// These typedefs should be used only in the interceptor definitions to replace
+// the standard system types (e.g. SSIZE_T instead of ssize_t)
+typedef __sanitizer::uptr    SIZE_T;
+typedef __sanitizer::sptr    SSIZE_T;
+typedef __sanitizer::sptr    PTRDIFF_T;
+typedef __sanitizer::s64     INTMAX_T;
+typedef __sanitizer::OFF_T   OFF_T;
+typedef __sanitizer::OFF64_T OFF64_T;
 
 // How to add an interceptor:
 // Suppose you need to wrap/replace system function (generally, from libc):
@@ -50,14 +49,14 @@
 //      3b) add DECLARE_REAL_AND_INTERCEPTOR(int, foo, const char*, double)
 //          to a header file.
 
-// Notes: 1. Things may not work properly if macro INTERCEPT(...) {...} or
+// Notes: 1. Things may not work properly if macro INTERCEPTOR(...) {...} or
 //           DECLARE_REAL(...) are located inside namespaces.
-//        2. On Mac you can also use: "OVERRIDE_FUNCTION(foo, zoo);" to
+//        2. On Mac you can also use: "OVERRIDE_FUNCTION(foo, zoo)" to
 //           effectively redirect calls from "foo" to "zoo". In this case
 //           you aren't required to implement
 //           INTERCEPTOR(int, foo, const char *bar, double baz) {...}
 //           but instead you'll have to add
-//           DEFINE_REAL(int, foo, const char *bar, double baz) in your
+//           DECLARE_REAL(int, foo, const char *bar, double baz) in your
 //           source file (to define a pointer to overriden function).
 
 // How it works:
@@ -68,29 +67,52 @@
 // we intercept. To resolve this we declare our interceptors with __interceptor_
 // prefix, and then make actual interceptors weak aliases to __interceptor_
 // functions.
+//
 // This is not so on Mac OS, where the two-level namespace makes
 // our replacement functions invisible to other libraries. This may be overcomed
 // using the DYLD_FORCE_FLAT_NAMESPACE, but some errors loading the shared
-// libraries in Chromium were noticed when doing so. Instead we use
-// mach_override, a handy framework for patching functions at runtime.
-// To avoid possible name clashes, our replacement functions have
-// the "wrap_" prefix on Mac.
-// An alternative to function patching is to create a dylib containing a
-// __DATA,__interpose section that associates library functions with their
-// wrappers. When this dylib is preloaded before an executable using
-// DYLD_INSERT_LIBRARIES, it routes all the calls to interposed functions done
-// through stubs to the wrapper functions. Such a library is built with
-// -DMAC_INTERPOSE_FUNCTIONS=1.
-
-#if !defined(MAC_INTERPOSE_FUNCTIONS) || !defined(__APPLE__)
-# define MAC_INTERPOSE_FUNCTIONS 0
-#endif
+// libraries in Chromium were noticed when doing so.
+// Instead we create a dylib containing a __DATA,__interpose section that
+// associates library functions with their wrappers. When this dylib is
+// preloaded before an executable using DYLD_INSERT_LIBRARIES, it routes all
+// the calls to interposed functions done through stubs to the wrapper
+// functions.
+// As it's decided at compile time which functions are to be intercepted on Mac,
+// INTERCEPT_FUNCTION() is effectively a no-op on this system.
 
 #if defined(__APPLE__)
+
+// Just a pair of pointers.
+struct interpose_substitution {
+  const uptr replacement;
+  const uptr original;
+};
+
+// For a function foo() create a global pair of pointers { wrap_foo, foo } in
+// the __DATA,__interpose section.
+// As a result all the calls to foo() will be routed to wrap_foo() at runtime.
+#define INTERPOSER(func_name) __attribute__((used)) \
+const interpose_substitution substitution_##func_name[] \
+    __attribute__((section("__DATA, __interpose"))) = { \
+    { reinterpret_cast<const uptr>(WRAP(func_name)), \
+      reinterpret_cast<const uptr>(func_name) } \
+}
+
+// For a function foo() and a wrapper function bar() create a global pair
+// of pointers { bar, foo } in the __DATA,__interpose section.
+// As a result all the calls to foo() will be routed to bar() at runtime.
+#define INTERPOSER_2(func_name, wrapper_name) __attribute__((used)) \
+const interpose_substitution substitution_##func_name[] \
+    __attribute__((section("__DATA, __interpose"))) = { \
+    { reinterpret_cast<const uptr>(wrapper_name), \
+      reinterpret_cast<const uptr>(func_name) } \
+}
+
 # define WRAP(x) wrap_##x
 # define WRAPPER_NAME(x) "wrap_"#x
 # define INTERCEPTOR_ATTRIBUTE
 # define DECLARE_WRAPPER(ret_type, func, ...)
+
 #elif defined(_WIN32)
 # if defined(_DLL)  // DLL CRT
 #  define WRAP(x) x
@@ -111,7 +133,7 @@
     __attribute__((weak, alias("__interceptor_" #func), visibility("default")));
 #endif
 
-#if !MAC_INTERPOSE_FUNCTIONS
+#if !defined(__APPLE__)
 # define PTR_TO_REAL(x) real_##x
 # define REAL(x) __interception::PTR_TO_REAL(x)
 # define FUNC_TYPE(x) x##_f
@@ -121,11 +143,11 @@
     namespace __interception { \
       extern FUNC_TYPE(func) PTR_TO_REAL(func); \
     }
-#else  // MAC_INTERPOSE_FUNCTIONS
+#else  // __APPLE__
 # define REAL(x) x
 # define DECLARE_REAL(ret_type, func, ...) \
     extern "C" ret_type func(__VA_ARGS__);
-#endif  // MAC_INTERPOSE_FUNCTIONS
+#endif  // __APPLE__
 
 #define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) \
   DECLARE_REAL(ret_type, func, __VA_ARGS__) \
@@ -135,7 +157,7 @@
 // macros does its job. In exceptional cases you may need to call REAL(foo)
 // without defining INTERCEPTOR(..., foo, ...). For example, if you override
 // foo with an interceptor for other function.
-#if !MAC_INTERPOSE_FUNCTIONS
+#if !defined(__APPLE__)
 # define DEFINE_REAL(ret_type, func, ...) \
     typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \
     namespace __interception { \
@@ -145,12 +167,24 @@
 # define DEFINE_REAL(ret_type, func, ...)
 #endif
 
+#if !defined(__APPLE__)
 #define INTERCEPTOR(ret_type, func, ...) \
   DEFINE_REAL(ret_type, func, __VA_ARGS__) \
   DECLARE_WRAPPER(ret_type, func, __VA_ARGS__) \
   extern "C" \
   INTERCEPTOR_ATTRIBUTE \
   ret_type WRAP(func)(__VA_ARGS__)
+#else  // __APPLE__
+#define INTERCEPTOR(ret_type, func, ...) \
+  extern "C" ret_type func(__VA_ARGS__); \
+  extern "C" ret_type WRAP(func)(__VA_ARGS__); \
+  INTERPOSER(func); \
+  extern "C" INTERCEPTOR_ATTRIBUTE ret_type WRAP(func)(__VA_ARGS__)
+
+// Override |overridee| with |overrider|.
+#define OVERRIDE_FUNCTION(overridee, overrider) \
+  INTERPOSER_2(overridee, WRAP(overrider))
+#endif
 
 #if defined(_WIN32)
 # define INTERCEPTOR_WINAPI(ret_type, func, ...) \
@@ -184,8 +218,6 @@
 # define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_LINUX(func)
 #elif defined(__APPLE__)
 # include "interception_mac.h"
-# define OVERRIDE_FUNCTION(old_func, new_func) \
-    OVERRIDE_FUNCTION_MAC(old_func, new_func)
 # define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_MAC(func)
 #else  // defined(_WIN32)
 # include "interception_win.h"
diff --git a/lib/interception/interception_mac.cc b/lib/interception/interception_mac.cc
index 2c10a71..b035cf9 100644
--- a/lib/interception/interception_mac.cc
+++ b/lib/interception/interception_mac.cc
@@ -15,17 +15,6 @@
 #ifdef __APPLE__
 
 #include "interception.h"
-#include "mach_override/mach_override.h"
 
-namespace __interception {
-bool OverrideFunction(uptr old_func, uptr new_func, uptr *orig_old_func) {
-  *orig_old_func = 0;
-  int res = __asan_mach_override_ptr_custom((void*)old_func, (void*)new_func,
-                                            (void**)orig_old_func,
-                                            __interception_allocate_island,
-                                            __interception_deallocate_island);
-  return (res == 0) && (*orig_old_func != 0);
-}
-}  // namespace __interception
 
 #endif  // __APPLE__
diff --git a/lib/interception/interception_mac.h b/lib/interception/interception_mac.h
index 6e9e808..5059489 100644
--- a/lib/interception/interception_mac.h
+++ b/lib/interception/interception_mac.h
@@ -21,29 +21,7 @@
 #ifndef INTERCEPTION_MAC_H
 #define INTERCEPTION_MAC_H
 
-#include <mach/mach_error.h>
-#include <stddef.h>
-
-// Allocate memory for the escape island. This cannot be moved to
-// mach_override, because each user of interceptors may specify its
-// own memory range for escape islands.
-extern "C" {
-mach_error_t __interception_allocate_island(void **ptr, size_t unused_size,
-                                            void *unused_hint);
-mach_error_t __interception_deallocate_island(void *ptr);
-}  // extern "C"
-
-namespace __interception {
-// returns true if the old function existed.
-bool OverrideFunction(uptr old_func, uptr new_func, uptr *orig_old_func);
-}  // namespace __interception
-
-# define OVERRIDE_FUNCTION_MAC(old_func, new_func) \
-    ::__interception::OverrideFunction( \
-          (::__interception::uptr)old_func, \
-          (::__interception::uptr)new_func, \
-          (::__interception::uptr*)((::__interception::uptr)&REAL(old_func)))
-# define INTERCEPT_FUNCTION_MAC(func) OVERRIDE_FUNCTION_MAC(func, WRAP(func))
+#define INTERCEPT_FUNCTION_MAC(func)
 
 #endif  // INTERCEPTION_MAC_H
 #endif  // __APPLE__
diff --git a/lib/interception/interception_type_test.cc b/lib/interception/interception_type_test.cc
new file mode 100644
index 0000000..7b79b78
--- /dev/null
+++ b/lib/interception/interception_type_test.cc
@@ -0,0 +1,39 @@
+//===-- interception_type_test.cc -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Compile-time tests of the internal type definitions.
+//===----------------------------------------------------------------------===//
+
+#if defined(__linux__) || defined(__APPLE__)
+
+#include "interception.h"
+#include <sys/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+COMPILER_CHECK(sizeof(SIZE_T) == sizeof(size_t));
+COMPILER_CHECK(sizeof(SSIZE_T) == sizeof(ssize_t));
+COMPILER_CHECK(sizeof(PTRDIFF_T) == sizeof(ptrdiff_t));
+COMPILER_CHECK(sizeof(INTMAX_T) == sizeof(intmax_t));
+
+#ifndef __APPLE__
+COMPILER_CHECK(sizeof(OFF64_T) == sizeof(off64_t));
+#endif
+
+// The following are the cases when pread (and friends) is used instead of
+// pread64. In those cases we need OFF_T to match off_t. We don't care about the
+// rest (they depend on _FILE_OFFSET_BITS setting when building an application).
+# if defined(__ANDROID__) || !defined _FILE_OFFSET_BITS || \
+  _FILE_OFFSET_BITS != 64
+COMPILER_CHECK(sizeof(OFF_T) == sizeof(off_t));
+# endif
+
+#endif
diff --git a/lib/interception/mach_override/LICENSE.TXT b/lib/interception/mach_override/LICENSE.TXT
deleted file mode 100644
index 9446965..0000000
--- a/lib/interception/mach_override/LICENSE.TXT
+++ /dev/null
@@ -1,3 +0,0 @@
-Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
-Some rights reserved: <http://opensource.org/licenses/mit-license.php>
-
diff --git a/lib/interception/mach_override/Makefile.mk b/lib/interception/mach_override/Makefile.mk
deleted file mode 100644
index 8f5ebda..0000000
--- a/lib/interception/mach_override/Makefile.mk
+++ /dev/null
@@ -1,22 +0,0 @@
-#===- lib/interception/mach_override/Makefile.mk -----------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := interception
-SubDirs :=
-
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o)
-
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
-
-# Define a convenience variable for all the interception functions.
-InterceptionFunctions += $(Sources:%.c=%)
diff --git a/lib/interception/mach_override/README.txt b/lib/interception/mach_override/README.txt
deleted file mode 100644
index 5f62ad7..0000000
--- a/lib/interception/mach_override/README.txt
+++ /dev/null
@@ -1,9 +0,0 @@
--- mach_override.c is taken from upstream version at
- https://github.com/rentzsch/mach_star/tree/f8e0c424b5be5cb641ded67c265e616157ae4bcf
--- Added debugging code under DEBUG_DISASM.
--- The files are guarded with #ifdef __APPLE__
--- some opcodes are added in order to parse the library functions on Lion
--- fixupInstructions() is extended to relocate relative calls, not only jumps
--- mach_override_ptr is renamed to __asan_mach_override_ptr and
- other functions are marked as hidden.
-
diff --git a/lib/interception/mach_override/mach_override.c b/lib/interception/mach_override/mach_override.c
deleted file mode 100644
index 7511a7b..0000000
--- a/lib/interception/mach_override/mach_override.c
+++ /dev/null
@@ -1,970 +0,0 @@
-/*******************************************************************************
-	mach_override.c
-		Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
-		Some rights reserved: <http://opensource.org/licenses/mit-license.php>
-
-	***************************************************************************/
-#ifdef __APPLE__
-
-#include "mach_override.h"
-
-#include <mach-o/dyld.h>
-#include <mach/mach_host.h>
-#include <mach/mach_init.h>
-#include <mach/vm_map.h>
-#include <sys/mman.h>
-
-#include <CoreServices/CoreServices.h>
-
-//#define DEBUG_DISASM 1
-#undef DEBUG_DISASM
-
-/**************************
-*	
-*	Constants
-*	
-**************************/
-#pragma mark	-
-#pragma mark	(Constants)
-
-#if defined(__ppc__) || defined(__POWERPC__)
-
-static
-long kIslandTemplate[] = {
-	0x9001FFFC,	//	stw		r0,-4(SP)
-	0x3C00DEAD,	//	lis		r0,0xDEAD
-	0x6000BEEF,	//	ori		r0,r0,0xBEEF
-	0x7C0903A6,	//	mtctr	r0
-	0x8001FFFC,	//	lwz		r0,-4(SP)
-	0x60000000,	//	nop		; optionally replaced
-	0x4E800420 	//	bctr
-};
-
-#define kAddressHi			3
-#define kAddressLo			5
-#define kInstructionHi		10
-#define kInstructionLo		11
-
-#elif defined(__i386__) 
-
-#define kOriginalInstructionsSize 16
-
-static
-unsigned char kIslandTemplate[] = {
-	// kOriginalInstructionsSize nop instructions so that we 
-	// should have enough space to host original instructions 
-	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 
-	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
-	// Now the real jump instruction
-	0xE9, 0xEF, 0xBE, 0xAD, 0xDE
-};
-
-#define kInstructions	0
-#define kJumpAddress    kInstructions + kOriginalInstructionsSize + 1
-#elif defined(__x86_64__)
-
-#define kOriginalInstructionsSize 32
-
-#define kJumpAddress    kOriginalInstructionsSize + 6
-
-static
-unsigned char kIslandTemplate[] = {
-	// kOriginalInstructionsSize nop instructions so that we 
-	// should have enough space to host original instructions 
-	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 
-	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
-	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 
-	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
-	// Now the real jump instruction
-	0xFF, 0x25, 0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x00
-};
-
-#endif
-
-#define	kAllocateHigh		1
-#define	kAllocateNormal		0
-
-/**************************
-*	
-*	Data Types
-*	
-**************************/
-#pragma mark	-
-#pragma mark	(Data Types)
-
-typedef	struct	{
-	char	instructions[sizeof(kIslandTemplate)];
-	int		allocatedHigh;
-}	BranchIsland;
-
-/**************************
-*	
-*	Funky Protos
-*	
-**************************/
-#pragma mark	-
-#pragma mark	(Funky Protos)
-
-
-	static mach_error_t
-allocateBranchIsland(
-		BranchIsland	**island,
-		int				allocateHigh,
-		void *originalFunctionAddress);
-
-	static mach_error_t
-freeBranchIsland(
-		BranchIsland	*island );
-
-	static mach_error_t
-defaultIslandMalloc(
-	  void **ptr, size_t unused_size, void *hint);
-
-	static mach_error_t
-defaultIslandFree(
-   	void *ptr);
-
-#if defined(__ppc__) || defined(__POWERPC__)
-	static mach_error_t
-setBranchIslandTarget(
-		BranchIsland	*island,
-		const void		*branchTo,
-		long			instruction );
-#endif 
-
-#if defined(__i386__) || defined(__x86_64__)
-static mach_error_t
-setBranchIslandTarget_i386(
-						   BranchIsland	*island,
-						   const void		*branchTo,
-						   char*			instructions );
-// Can't be made static because there's no C implementation for atomic_mov64
-// on i386.
-void 
-atomic_mov64(
-		uint64_t *targetAddress,
-		uint64_t value ) __attribute__((visibility("hidden")));
-
-	static Boolean 
-eatKnownInstructions( 
-	unsigned char	*code, 
-	uint64_t		*newInstruction,
-	int				*howManyEaten, 
-	char			*originalInstructions,
-	int				*originalInstructionCount, 
-	uint8_t			*originalInstructionSizes );
-
-	static void
-fixupInstructions(
-    void		*originalFunction,
-    void		*escapeIsland,
-    void		*instructionsToFix,
-	int			instructionCount,
-	uint8_t		*instructionSizes );
-
-#ifdef DEBUG_DISASM
-	static void
-dump16Bytes(
-	void	*ptr);
-#endif  // DEBUG_DISASM
-#endif
-
-/*******************************************************************************
-*	
-*	Interface
-*	
-*******************************************************************************/
-#pragma mark	-
-#pragma mark	(Interface)
-
-#if defined(__i386__) || defined(__x86_64__)
-static mach_error_t makeIslandExecutable(void *address) {
-	mach_error_t err = err_none;
-    vm_size_t pageSize;
-    host_page_size( mach_host_self(), &pageSize );
-    uintptr_t page = (uintptr_t)address & ~(uintptr_t)(pageSize-1);
-    int e = err_none;
-    e |= mprotect((void *)page, pageSize, PROT_EXEC | PROT_READ | PROT_WRITE);
-    e |= msync((void *)page, pageSize, MS_INVALIDATE );
-    if (e) {
-        err = err_cannot_override;
-    }
-    return err;
-}
-#endif
-
-		static mach_error_t
-defaultIslandMalloc(
-	void **ptr, size_t unused_size, void *hint) {
-  return allocateBranchIsland( (BranchIsland**)ptr, kAllocateHigh, hint );
-}
-		static mach_error_t
-defaultIslandFree(
-	void *ptr) {
-	return freeBranchIsland(ptr);
-}
-
-    mach_error_t
-__asan_mach_override_ptr(
-	void *originalFunctionAddress,
-    const void *overrideFunctionAddress,
-    void **originalFunctionReentryIsland )
-{
-  return __asan_mach_override_ptr_custom(originalFunctionAddress,
-		overrideFunctionAddress,
-		originalFunctionReentryIsland,
-		defaultIslandMalloc,
-		defaultIslandFree);
-}
-
-    mach_error_t
-__asan_mach_override_ptr_custom(
-	void *originalFunctionAddress,
-    const void *overrideFunctionAddress,
-    void **originalFunctionReentryIsland,
-		island_malloc *alloc,
-		island_free *dealloc)
-{
-	assert( originalFunctionAddress );
-	assert( overrideFunctionAddress );
-	
-	// this addresses overriding such functions as AudioOutputUnitStart()
-	// test with modified DefaultOutputUnit project
-#if defined(__x86_64__)
-    for(;;){
-        if(*(uint16_t*)originalFunctionAddress==0x25FF)    // jmp qword near [rip+0x????????]
-            originalFunctionAddress=*(void**)((char*)originalFunctionAddress+6+*(int32_t *)((uint16_t*)originalFunctionAddress+1));
-        else break;
-    }
-#elif defined(__i386__)
-    for(;;){
-        if(*(uint16_t*)originalFunctionAddress==0x25FF)    // jmp *0x????????
-            originalFunctionAddress=**(void***)((uint16_t*)originalFunctionAddress+1);
-        else break;
-    }
-#endif
-#ifdef DEBUG_DISASM
-  {
-    fprintf(stderr, "Replacing function at %p\n", originalFunctionAddress);
-    fprintf(stderr, "First 16 bytes of the function: ");
-    unsigned char *orig = (unsigned char *)originalFunctionAddress;
-    int i;
-    for (i = 0; i < 16; i++) {
-       fprintf(stderr, "%x ", (unsigned int) orig[i]);
-    }
-    fprintf(stderr, "\n");
-    fprintf(stderr, 
-            "To disassemble, save the following function as disas.c"
-            " and run:\n  gcc -c disas.c && gobjdump -d disas.o\n"
-            "The first 16 bytes of the original function will start"
-            " after four nop instructions.\n");
-    fprintf(stderr, "\nvoid foo() {\n  asm volatile(\"nop;nop;nop;nop;\");\n");
-    int j = 0;
-    for (j = 0; j < 2; j++) {
-      fprintf(stderr, "  asm volatile(\".byte ");
-      for (i = 8 * j; i < 8 * (j+1) - 1; i++) {
-        fprintf(stderr, "0x%x, ", (unsigned int) orig[i]);
-      }
-      fprintf(stderr, "0x%x;\");\n", (unsigned int) orig[8 * (j+1) - 1]);
-    }
-    fprintf(stderr, "}\n\n");
-  }
-#endif
-
-	long	*originalFunctionPtr = (long*) originalFunctionAddress;
-	mach_error_t	err = err_none;
-	
-#if defined(__ppc__) || defined(__POWERPC__)
-	//	Ensure first instruction isn't 'mfctr'.
-	#define	kMFCTRMask			0xfc1fffff
-	#define	kMFCTRInstruction	0x7c0903a6
-	
-	long	originalInstruction = *originalFunctionPtr;
-	if( !err && ((originalInstruction & kMFCTRMask) == kMFCTRInstruction) )
-		err = err_cannot_override;
-#elif defined(__i386__) || defined(__x86_64__)
-	int eatenCount = 0;
-	int originalInstructionCount = 0;
-	char originalInstructions[kOriginalInstructionsSize];
-	uint8_t originalInstructionSizes[kOriginalInstructionsSize];
-	uint64_t jumpRelativeInstruction = 0; // JMP
-
-	Boolean overridePossible = eatKnownInstructions ((unsigned char *)originalFunctionPtr, 
-										&jumpRelativeInstruction, &eatenCount, 
-										originalInstructions, &originalInstructionCount, 
-										originalInstructionSizes );
-#ifdef DEBUG_DISASM
-  if (!overridePossible) fprintf(stderr, "overridePossible = false @%d\n", __LINE__);
-#endif
-	if (eatenCount > kOriginalInstructionsSize) {
-#ifdef DEBUG_DISASM
-		fprintf(stderr, "Too many instructions eaten\n");
-#endif    
-		overridePossible = false;
-	}
-	if (!overridePossible) err = err_cannot_override;
-	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
-#endif
-	
-	//	Make the original function implementation writable.
-	if( !err ) {
-		err = vm_protect( mach_task_self(),
-				(vm_address_t) originalFunctionPtr, 8, false,
-				(VM_PROT_ALL | VM_PROT_COPY) );
-		if( err )
-			err = vm_protect( mach_task_self(),
-					(vm_address_t) originalFunctionPtr, 8, false,
-					(VM_PROT_DEFAULT | VM_PROT_COPY) );
-	}
-	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
-	
-	//	Allocate and target the escape island to the overriding function.
-	BranchIsland	*escapeIsland = NULL;
-	if( !err )
-		err = alloc( (void**)&escapeIsland, sizeof(BranchIsland), originalFunctionAddress );
-	if ( err ) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
-	
-#if defined(__ppc__) || defined(__POWERPC__)
-	if( !err )
-		err = setBranchIslandTarget( escapeIsland, overrideFunctionAddress, 0 );
-	
-	//	Build the branch absolute instruction to the escape island.
-	long	branchAbsoluteInstruction = 0; // Set to 0 just to silence warning.
-	if( !err ) {
-		long escapeIslandAddress = ((long) escapeIsland) & 0x3FFFFFF;
-		branchAbsoluteInstruction = 0x48000002 | escapeIslandAddress;
-	}
-#elif defined(__i386__) || defined(__x86_64__)
-        if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
-
-	if( !err )
-		err = setBranchIslandTarget_i386( escapeIsland, overrideFunctionAddress, 0 );
- 
-	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
-	// Build the jump relative instruction to the escape island
-#endif
-
-
-#if defined(__i386__) || defined(__x86_64__)
-	if (!err) {
-		uint32_t addressOffset = ((char*)escapeIsland - (char*)originalFunctionPtr - 5);
-		addressOffset = OSSwapInt32(addressOffset);
-		
-		jumpRelativeInstruction |= 0xE900000000000000LL; 
-		jumpRelativeInstruction |= ((uint64_t)addressOffset & 0xffffffff) << 24;
-		jumpRelativeInstruction = OSSwapInt64(jumpRelativeInstruction);		
-	}
-#endif
-	
-	//	Optionally allocate & return the reentry island. This may contain relocated
-	//  jmp instructions and so has all the same addressing reachability requirements
-	//  the escape island has to the original function, except the escape island is
-	//  technically our original function.
-	BranchIsland	*reentryIsland = NULL;
-	if( !err && originalFunctionReentryIsland ) {
-		err = alloc( (void**)&reentryIsland, sizeof(BranchIsland), escapeIsland);
-		if( !err )
-			*originalFunctionReentryIsland = reentryIsland;
-	}
-	
-#if defined(__ppc__) || defined(__POWERPC__)	
-	//	Atomically:
-	//	o If the reentry island was allocated:
-	//		o Insert the original instruction into the reentry island.
-	//		o Target the reentry island at the 2nd instruction of the
-	//		  original function.
-	//	o Replace the original instruction with the branch absolute.
-	if( !err ) {
-		int escapeIslandEngaged = false;
-		do {
-			if( reentryIsland )
-				err = setBranchIslandTarget( reentryIsland,
-						(void*) (originalFunctionPtr+1), originalInstruction );
-			if( !err ) {
-				escapeIslandEngaged = CompareAndSwap( originalInstruction,
-										branchAbsoluteInstruction,
-										(UInt32*)originalFunctionPtr );
-				if( !escapeIslandEngaged ) {
-					//	Someone replaced the instruction out from under us,
-					//	re-read the instruction, make sure it's still not
-					//	'mfctr' and try again.
-					originalInstruction = *originalFunctionPtr;
-					if( (originalInstruction & kMFCTRMask) == kMFCTRInstruction)
-						err = err_cannot_override;
-				}
-			}
-		} while( !err && !escapeIslandEngaged );
-	}
-#elif defined(__i386__) || defined(__x86_64__)
-	// Atomically:
-	//	o If the reentry island was allocated:
-	//		o Insert the original instructions into the reentry island.
-	//		o Target the reentry island at the first non-replaced 
-	//        instruction of the original function.
-	//	o Replace the original first instructions with the jump relative.
-	//
-	// Note that on i386, we do not support someone else changing the code under our feet
-	if ( !err ) {
-		fixupInstructions(originalFunctionPtr, reentryIsland, originalInstructions,
-					originalInstructionCount, originalInstructionSizes );
-	
-		if( reentryIsland )
-			err = setBranchIslandTarget_i386( reentryIsland,
-										 (void*) ((char *)originalFunctionPtr+eatenCount), originalInstructions );
-		// try making islands executable before planting the jmp
-#if defined(__x86_64__) || defined(__i386__)
-        if( !err )
-            err = makeIslandExecutable(escapeIsland);
-        if( !err && reentryIsland )
-            err = makeIslandExecutable(reentryIsland);
-#endif
-		if ( !err )
-			atomic_mov64((uint64_t *)originalFunctionPtr, jumpRelativeInstruction);
-	}
-#endif
-	
-	//	Clean up on error.
-	if( err ) {
-		if( reentryIsland )
-			dealloc( reentryIsland );
-		if( escapeIsland )
-			dealloc( escapeIsland );
-	}
-
-#ifdef DEBUG_DISASM
-  {
-    fprintf(stderr, "First 16 bytes of the function after slicing: ");
-    unsigned char *orig = (unsigned char *)originalFunctionAddress;
-    int i;
-    for (i = 0; i < 16; i++) {
-       fprintf(stderr, "%x ", (unsigned int) orig[i]);
-    }
-    fprintf(stderr, "\n");
-  }
-#endif
-	return err;
-}
-
-/*******************************************************************************
-*	
-*	Implementation
-*	
-*******************************************************************************/
-#pragma mark	-
-#pragma mark	(Implementation)
-
-/***************************************************************************//**
-	Implementation: Allocates memory for a branch island.
-	
-	@param	island			<-	The allocated island.
-	@param	allocateHigh	->	Whether to allocate the island at the end of the
-								address space (for use with the branch absolute
-								instruction).
-	@result					<-	mach_error_t
-
-	***************************************************************************/
-
-	static mach_error_t
-allocateBranchIsland(
-		BranchIsland	**island,
-		int				allocateHigh,
-		void *originalFunctionAddress)
-{
-	assert( island );
-	
-	mach_error_t	err = err_none;
-	
-	if( allocateHigh ) {
-		vm_size_t pageSize;
-		err = host_page_size( mach_host_self(), &pageSize );
-		if( !err ) {
-			assert( sizeof( BranchIsland ) <= pageSize );
-#if defined(__ppc__) || defined(__POWERPC__)
-			vm_address_t first = 0xfeffffff;
-			vm_address_t last = 0xfe000000 + pageSize;
-#elif defined(__x86_64__)
-			vm_address_t first = ((uint64_t)originalFunctionAddress & ~(uint64_t)(((uint64_t)1 << 31) - 1)) | ((uint64_t)1 << 31); // start in the middle of the page?
-			vm_address_t last = 0x0;
-#else
-			vm_address_t first = 0xffc00000;
-			vm_address_t last = 0xfffe0000;
-#endif
-
-			vm_address_t page = first;
-			int allocated = 0;
-			vm_map_t task_self = mach_task_self();
-			
-			while( !err && !allocated && page != last ) {
-
-				err = vm_allocate( task_self, &page, pageSize, 0 );
-				if( err == err_none )
-					allocated = 1;
-				else if( err == KERN_NO_SPACE ) {
-#if defined(__x86_64__)
-					page -= pageSize;
-#else
-					page += pageSize;
-#endif
-					err = err_none;
-				}
-			}
-			if( allocated )
-				*island = (BranchIsland*) page;
-			else if( !allocated && !err )
-				err = KERN_NO_SPACE;
-		}
-	} else {
-		void *block = malloc( sizeof( BranchIsland ) );
-		if( block )
-			*island = block;
-		else
-			err = KERN_NO_SPACE;
-	}
-	if( !err )
-		(**island).allocatedHigh = allocateHigh;
-	
-	return err;
-}
-
-/***************************************************************************//**
-	Implementation: Deallocates memory for a branch island.
-	
-	@param	island	->	The island to deallocate.
-	@result			<-	mach_error_t
-
-	***************************************************************************/
-
-	static mach_error_t
-freeBranchIsland(
-		BranchIsland	*island )
-{
-	assert( island );
-	assert( (*(long*)&island->instructions[0]) == kIslandTemplate[0] );
-	assert( island->allocatedHigh );
-	
-	mach_error_t	err = err_none;
-	
-	if( island->allocatedHigh ) {
-		vm_size_t pageSize;
-		err = host_page_size( mach_host_self(), &pageSize );
-		if( !err ) {
-			assert( sizeof( BranchIsland ) <= pageSize );
-			err = vm_deallocate(
-					mach_task_self(),
-					(vm_address_t) island, pageSize );
-		}
-	} else {
-		free( island );
-	}
-	
-	return err;
-}
-
-/***************************************************************************//**
-	Implementation: Sets the branch island's target, with an optional
-	instruction.
-	
-	@param	island		->	The branch island to insert target into.
-	@param	branchTo	->	The address of the target.
-	@param	instruction	->	Optional instruction to execute prior to branch. Set
-							to zero for nop.
-	@result				<-	mach_error_t
-
-	***************************************************************************/
-#if defined(__ppc__) || defined(__POWERPC__)
-	static mach_error_t
-setBranchIslandTarget(
-		BranchIsland	*island,
-		const void		*branchTo,
-		long			instruction )
-{
-	//	Copy over the template code.
-    bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
-    
-    //	Fill in the address.
-    ((short*)island->instructions)[kAddressLo] = ((long) branchTo) & 0x0000FFFF;
-    ((short*)island->instructions)[kAddressHi]
-    	= (((long) branchTo) >> 16) & 0x0000FFFF;
-    
-    //	Fill in the (optional) instuction.
-    if( instruction != 0 ) {
-        ((short*)island->instructions)[kInstructionLo]
-        	= instruction & 0x0000FFFF;
-        ((short*)island->instructions)[kInstructionHi]
-        	= (instruction >> 16) & 0x0000FFFF;
-    }
-    
-    //MakeDataExecutable( island->instructions, sizeof( kIslandTemplate ) );
-	msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
-    
-    return err_none;
-}
-#endif 
-
-#if defined(__i386__)
-	static mach_error_t
-setBranchIslandTarget_i386(
-	BranchIsland	*island,
-	const void		*branchTo,
-	char*			instructions )
-{
-
-	//	Copy over the template code.
-    bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
-
-	// copy original instructions
-	if (instructions) {
-		bcopy (instructions, island->instructions + kInstructions, kOriginalInstructionsSize);
-	}
-	
-    // Fill in the address.
-    int32_t addressOffset = (char *)branchTo - (island->instructions + kJumpAddress + 4);
-    *((int32_t *)(island->instructions + kJumpAddress)) = addressOffset; 
-
-    msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
-    return err_none;
-}
-
-#elif defined(__x86_64__)
-static mach_error_t
-setBranchIslandTarget_i386(
-        BranchIsland	*island,
-        const void		*branchTo,
-        char*			instructions )
-{
-    // Copy over the template code.
-    bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
-
-    // Copy original instructions.
-    if (instructions) {
-        bcopy (instructions, island->instructions, kOriginalInstructionsSize);
-    }
-
-    //	Fill in the address.
-    *((uint64_t *)(island->instructions + kJumpAddress)) = (uint64_t)branchTo; 
-    msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
-
-    return err_none;
-}
-#endif
-
-
-#if defined(__i386__) || defined(__x86_64__)
-// simplistic instruction matching
-typedef struct {
-	unsigned int length; // max 15
-	unsigned char mask[15]; // sequence of bytes in memory order
-	unsigned char constraint[15]; // sequence of bytes in memory order
-}	AsmInstructionMatch;
-
-#if defined(__i386__)
-static AsmInstructionMatch possibleInstructions[] = {
-	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} },	// jmp 0x????????
-	{ 0x5, {0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, {0x55, 0x89, 0xe5, 0xc9, 0xc3} },	// push %esp; mov %esp,%ebp; leave; ret
-	{ 0x1, {0xFF}, {0x90} },							// nop
-	{ 0x1, {0xF8}, {0x50} },							// push %reg
-	{ 0x2, {0xFF, 0xFF}, {0x89, 0xE5} },				                // mov %esp,%ebp
-	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x89, 0x1C, 0x24} },				                // mov %ebx,(%esp)
-	{ 0x3, {0xFF, 0xFF, 0x00}, {0x83, 0xEC, 0x00} },	                        // sub 0x??, %esp
-	{ 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}, {0x81, 0xEC, 0x00, 0x00, 0x00, 0x00} },	// sub 0x??, %esp with 32bit immediate
-	{ 0x2, {0xFF, 0xFF}, {0x31, 0xC0} },						// xor %eax, %eax
-	{ 0x3, {0xFF, 0x4F, 0x00}, {0x8B, 0x45, 0x00} },  // mov $imm(%ebp), %reg
-	{ 0x3, {0xFF, 0x4C, 0x00}, {0x8B, 0x40, 0x00} },  // mov $imm(%eax-%edx), %reg
-	{ 0x3, {0xFF, 0xCF, 0x00}, {0x8B, 0x4D, 0x00} },  // mov $imm(%rpb), %reg
-	{ 0x3, {0xFF, 0x4F, 0x00}, {0x8A, 0x4D, 0x00} },  // mov $imm(%ebp), %cl
-	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x8B, 0x4C, 0x24, 0x00} },  			// mov $imm(%esp), %ecx
-	{ 0x4, {0xFF, 0x00, 0x00, 0x00}, {0x8B, 0x00, 0x00, 0x00} },  			// mov r16,r/m16 or r32,r/m32
-	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB9, 0x00, 0x00, 0x00, 0x00} }, 	// mov $imm, %ecx
-	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} }, 	// mov $imm, %eax
-	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x66, 0x0F, 0xEF, 0x00} },             	// pxor xmm2/128, xmm1
-	{ 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, 						// fninit
-	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE8, 0x00, 0x00, 0x00, 0x00} },	// call $imm
-	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x0F, 0xBE, 0x55, 0x00} },                    // movsbl $imm(%ebp), %edx
-	{ 0x0, {0x00}, {0x00} }
-};
-#elif defined(__x86_64__)
-// TODO(glider): disassembling the "0x48, 0x89" sequences is trickier than it's done below.
-// If it stops working, refer to http://ref.x86asm.net/geek.html#modrm_byte_32_64 to do it
-// more accurately.
-// Note: 0x48 is in fact the REX.W prefix, but it might be wrong to treat it as a separate
-// instruction.
-static AsmInstructionMatch possibleInstructions[] = {
-	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} },	// jmp 0x????????
-	{ 0x1, {0xFF}, {0x90} },							// nop
-	{ 0x1, {0xF8}, {0x50} },							// push %rX
-	{ 0x1, {0xFF}, {0x65} },							// GS prefix
-	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x48, 0x89, 0xE5} },				// mov %rsp,%rbp
-	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xEC, 0x00} },	                // sub 0x??, %rsp
-	{ 0x4, {0xFB, 0xFF, 0x07, 0x00}, {0x48, 0x89, 0x05, 0x00} },	                // move onto rbp
-	{ 0x3, {0xFB, 0xFF, 0x00}, {0x48, 0x89, 0x00} },	                            // mov %reg, %reg
-	{ 0x3, {0xFB, 0xFF, 0x00}, {0x49, 0x89, 0x00} },	                            // mov %reg, %reg (REX.WB)
-	{ 0x2, {0xFF, 0x00}, {0x41, 0x00} },						// push %rXX
-	{ 0x2, {0xFF, 0x00}, {0x84, 0x00} },						// test %rX8,%rX8
-	{ 0x2, {0xFF, 0x00}, {0x85, 0x00} },						// test %rX,%rX
-	{ 0x2, {0xFF, 0x00}, {0x77, 0x00} },						// ja $i8
-	{ 0x2, {0xFF, 0x00}, {0x74, 0x00} },						// je $i8
-	{ 0x5, {0xF8, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} },	// mov $imm, %reg
-	{ 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} },				// pushq $imm(%rdi)
-	{ 0x2, {0xFF, 0xFF}, {0x31, 0xC0} },						// xor %eax, %eax
-	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0x25, 0x00, 0x00, 0x00, 0x00} },	// and $imm, %eax
-	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x80, 0x3F, 0x00} },				// cmpb $imm, (%rdi)
-
-  { 0x8, {0xFF, 0xFF, 0xCF, 0xFF, 0x00, 0x00, 0x00, 0x00},
-         {0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00}, },                     // mov $imm, %{rax,rdx,rsp,rsi}
-  { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xFA, 0x00}, },   // cmp $i8, %rdx
-	{ 0x4, {0xFF, 0xFF, 0x00, 0x00}, {0x83, 0x7f, 0x00, 0x00}, },			// cmpl $imm, $imm(%rdi)
-	{ 0xa, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
-               {0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} },    // mov $imm, %rax
-        { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00},
-               {0x81, 0xE6, 0x00, 0x00, 0x00, 0x00} },                            // and $imm, %esi
-        { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00},
-               {0xFF, 0x25, 0x00, 0x00, 0x00, 0x00} },                            // jmpq *(%rip)
-        { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x66, 0x0F, 0xEF, 0x00} },              // pxor xmm2/128, xmm1
-        { 0x2, {0xFF, 0x00}, {0x89, 0x00} },                               // mov r/m32,r32 or r/m16,r16
-        { 0x3, {0xFF, 0xFF, 0xFF}, {0x49, 0x89, 0xF8} },                   // mov %rdi,%r8
-        { 0x4, {0xFF, 0xFF, 0xFF, 0xFF}, {0x40, 0x0F, 0xBE, 0xCE} },       // movsbl %sil,%ecx
-        { 0x7, {0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00},
-               {0x48, 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00} },  // lea $imm(%rip),%rax
-        { 0x3, {0xFF, 0xFF, 0xFF}, {0x0F, 0xBE, 0xCE} },  // movsbl, %dh, %ecx
-        { 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} },  // pushq $imm(%rdi)
-        { 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, // fninit
-        { 0x3, {0xFF, 0xFF, 0xFF}, {0x48, 0x85, 0xD2} },  // test %rdx,%rdx
-	{ 0x0, {0x00}, {0x00} }
-};
-#endif
-
-static Boolean codeMatchesInstruction(unsigned char *code, AsmInstructionMatch* instruction) 
-{
-	Boolean match = true;
-	
-	size_t i;
-  assert(instruction);
-#ifdef DEBUG_DISASM
-	fprintf(stderr, "Matching: ");
-#endif  
-	for (i=0; i<instruction->length; i++) {
-		unsigned char mask = instruction->mask[i];
-		unsigned char constraint = instruction->constraint[i];
-		unsigned char codeValue = code[i];
-#ifdef DEBUG_DISASM
-		fprintf(stderr, "%x ", (unsigned)codeValue);
-#endif    
-		match = ((codeValue & mask) == constraint);
-		if (!match) break;
-	}
-#ifdef DEBUG_DISASM
-	if (match) {
-		fprintf(stderr, " OK\n");
-	} else {
-		fprintf(stderr, " FAIL\n");
-	}
-#endif  
-	return match;
-}
-
-#if defined(__i386__) || defined(__x86_64__)
-	static Boolean 
-eatKnownInstructions( 
-	unsigned char	*code, 
-	uint64_t		*newInstruction,
-	int				*howManyEaten, 
-	char			*originalInstructions,
-	int				*originalInstructionCount, 
-	uint8_t			*originalInstructionSizes )
-{
-	Boolean allInstructionsKnown = true;
-	int totalEaten = 0;
-	unsigned char* ptr = code;
-	int remainsToEat = 5; // a JMP instruction takes 5 bytes
-	int instructionIndex = 0;
-	
-	if (howManyEaten) *howManyEaten = 0;
-	if (originalInstructionCount) *originalInstructionCount = 0;
-	while (remainsToEat > 0) {
-		Boolean curInstructionKnown = false;
-		
-		// See if instruction matches one  we know
-		AsmInstructionMatch* curInstr = possibleInstructions;
-		do { 
-			if ((curInstructionKnown = codeMatchesInstruction(ptr, curInstr))) break;
-			curInstr++;
-		} while (curInstr->length > 0);
-		
-		// if all instruction matches failed, we don't know current instruction then, stop here
-		if (!curInstructionKnown) { 
-			allInstructionsKnown = false;
-			fprintf(stderr, "mach_override: some instructions unknown! Need to update mach_override.c\n");
-			break;
-		}
-		
-		// At this point, we've matched curInstr
-		int eaten = curInstr->length;
-		ptr += eaten;
-		remainsToEat -= eaten;
-		totalEaten += eaten;
-		
-		if (originalInstructionSizes) originalInstructionSizes[instructionIndex] = eaten;
-		instructionIndex += 1;
-		if (originalInstructionCount) *originalInstructionCount = instructionIndex;
-	}
-
-
-	if (howManyEaten) *howManyEaten = totalEaten;
-
-	if (originalInstructions) {
-		Boolean enoughSpaceForOriginalInstructions = (totalEaten < kOriginalInstructionsSize);
-		
-		if (enoughSpaceForOriginalInstructions) {
-			memset(originalInstructions, 0x90 /* NOP */, kOriginalInstructionsSize); // fill instructions with NOP
-			bcopy(code, originalInstructions, totalEaten);
-		} else {
-#ifdef DEBUG_DISASM
-			fprintf(stderr, "Not enough space in island to store original instructions. Adapt the island definition and kOriginalInstructionsSize\n");
-#endif      
-			return false;
-		}
-	}
-	
-	if (allInstructionsKnown) {
-		// save last 3 bytes of first 64bits of codre we'll replace
-		uint64_t currentFirst64BitsOfCode = *((uint64_t *)code);
-		currentFirst64BitsOfCode = OSSwapInt64(currentFirst64BitsOfCode); // back to memory representation
-		currentFirst64BitsOfCode &= 0x0000000000FFFFFFLL; 
-		
-		// keep only last 3 instructions bytes, first 5 will be replaced by JMP instr
-		*newInstruction &= 0xFFFFFFFFFF000000LL; // clear last 3 bytes
-		*newInstruction |= (currentFirst64BitsOfCode & 0x0000000000FFFFFFLL); // set last 3 bytes
-	}
-
-	return allInstructionsKnown;
-}
-
-	static void
-fixupInstructions(
-    void		*originalFunction,
-    void		*escapeIsland,
-    void		*instructionsToFix,
-	int			instructionCount,
-	uint8_t		*instructionSizes )
-{
-	void *initialOriginalFunction = originalFunction;
-	int	index, fixed_size, code_size = 0;
-	for (index = 0;index < instructionCount;index += 1)
-		code_size += instructionSizes[index];
-
-#ifdef DEBUG_DISASM
-	void *initialInstructionsToFix = instructionsToFix;
-	fprintf(stderr, "BEFORE FIXING:\n");
-	dump16Bytes(initialOriginalFunction);
-	dump16Bytes(initialInstructionsToFix);
-#endif  // DEBUG_DISASM
-
-	for (index = 0;index < instructionCount;index += 1)
-	{
-                fixed_size = instructionSizes[index];
-		if ((*(uint8_t*)instructionsToFix == 0xE9) || // 32-bit jump relative
-		    (*(uint8_t*)instructionsToFix == 0xE8))   // 32-bit call relative
-		{
-			uint32_t offset = (uintptr_t)originalFunction - (uintptr_t)escapeIsland;
-			uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 1);
-			*jumpOffsetPtr += offset;
-		}
-		if ((*(uint8_t*)instructionsToFix == 0x74) ||  // Near jump if equal (je), 2 bytes.
-		    (*(uint8_t*)instructionsToFix == 0x77))    // Near jump if above (ja), 2 bytes.
-		{
-			// We replace a near je/ja instruction, "7P JJ", with a 32-bit je/ja, "0F 8P WW XX YY ZZ".
-			// This is critical, otherwise a near jump will likely fall outside the original function.
-			uint32_t offset = (uintptr_t)initialOriginalFunction - (uintptr_t)escapeIsland;
-			uint32_t jumpOffset = *(uint8_t*)((uintptr_t)instructionsToFix + 1);
-			*((uint8_t*)instructionsToFix + 1) = *(uint8_t*)instructionsToFix + 0x10;
-			*(uint8_t*)instructionsToFix = 0x0F;
-			uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 2 );
-			*jumpOffsetPtr = offset + jumpOffset;
-			fixed_size = 6;
-                }
-		
-		originalFunction = (void*)((uintptr_t)originalFunction + instructionSizes[index]);
-		escapeIsland = (void*)((uintptr_t)escapeIsland + instructionSizes[index]);
-		instructionsToFix = (void*)((uintptr_t)instructionsToFix + fixed_size);
-
-		// Expanding short instructions into longer ones may overwrite the next instructions,
-		// so we must restore them.
-		code_size -= fixed_size;
-		if ((code_size > 0) && (fixed_size != instructionSizes[index])) {
-			bcopy(originalFunction, instructionsToFix, code_size);
-		}
-	}
-#ifdef DEBUG_DISASM
-	fprintf(stderr, "AFTER_FIXING:\n");
-	dump16Bytes(initialOriginalFunction);
-	dump16Bytes(initialInstructionsToFix);
-#endif  // DEBUG_DISASM
-}
-
-#ifdef DEBUG_DISASM
-#define HEX_DIGIT(x) ((((x) % 16) < 10) ? ('0' + ((x) % 16)) : ('A' + ((x) % 16 - 10)))
-
-	static void
-dump16Bytes(
-	void 	*ptr) {
-	int i;
-	char buf[3];
-	uint8_t *bytes = (uint8_t*)ptr;
-	for (i = 0; i < 16; i++) {
-		buf[0] = HEX_DIGIT(bytes[i] / 16);
-		buf[1] = HEX_DIGIT(bytes[i] % 16);
-		buf[2] = ' ';
-		write(2, buf, 3);
-	}
-	write(2, "\n", 1);
-}
-#endif  // DEBUG_DISASM
-#endif
-
-#if defined(__i386__)
-__asm(
-			".text;"
-			".align 2, 0x90;"
-			"_atomic_mov64:;"
-			"	pushl %ebp;"
-			"	movl %esp, %ebp;"
-			"	pushl %esi;"
-			"	pushl %ebx;"
-			"	pushl %ecx;"
-			"	pushl %eax;"
-			"	pushl %edx;"
-	
-			// atomic push of value to an address
-			// we use cmpxchg8b, which compares content of an address with 
-			// edx:eax. If they are equal, it atomically puts 64bit value 
-			// ecx:ebx in address. 
-			// We thus put contents of address in edx:eax to force ecx:ebx
-			// in address
-			"	mov		8(%ebp), %esi;"  // esi contains target address
-			"	mov		12(%ebp), %ebx;"
-			"	mov		16(%ebp), %ecx;" // ecx:ebx now contains value to put in target address
-			"	mov		(%esi), %eax;"
-			"	mov		4(%esi), %edx;"  // edx:eax now contains value currently contained in target address
-			"	lock; cmpxchg8b	(%esi);" // atomic move.
-			
-			// restore registers
-			"	popl %edx;"
-			"	popl %eax;"
-			"	popl %ecx;"
-			"	popl %ebx;"
-			"	popl %esi;"
-			"	popl %ebp;"
-			"	ret"
-);
-#elif defined(__x86_64__)
-void atomic_mov64(
-		uint64_t *targetAddress,
-		uint64_t value )
-{
-    *targetAddress = value;
-}
-#endif
-#endif
-#endif  // __APPLE__
diff --git a/lib/interception/mach_override/mach_override.h b/lib/interception/mach_override/mach_override.h
deleted file mode 100644
index 7e60cdc..0000000
--- a/lib/interception/mach_override/mach_override.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*******************************************************************************
-	mach_override.h
-		Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
-		Some rights reserved: <http://opensource.org/licenses/mit-license.php>
-
-	***************************************************************************/
-
-/***************************************************************************//**
-	@mainpage	mach_override
-	@author		Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
-	
-	This package, coded in C to the Mach API, allows you to override ("patch")
-	program- and system-supplied functions at runtime. You can fully replace
-	functions with your implementations, or merely head- or tail-patch the
-	original implementations.
-	
-	Use it by #include'ing mach_override.h from your .c, .m or .mm file(s).
-	
-	@todo	Discontinue use of Carbon's MakeDataExecutable() and
-			CompareAndSwap() calls and start using the Mach equivalents, if they
-			exist. If they don't, write them and roll them in. That way, this
-			code will be pure Mach, which will make it easier to use everywhere.
-			Update: MakeDataExecutable() has been replaced by
-			msync(MS_INVALIDATE). There is an OSCompareAndSwap in libkern, but
-			I'm currently unsure if I can link against it. May have to roll in
-			my own version...
-	@todo	Stop using an entire 4K high-allocated VM page per 28-byte escape
-			branch island. Done right, this will dramatically speed up escape
-			island allocations when they number over 250. Then again, if you're
-			overriding more than 250 functions, maybe speed isn't your main
-			concern...
-	@todo	Add detection of: b, bl, bla, bc, bcl, bcla, bcctrl, bclrl
-			first-instructions. Initially, we should refuse to override
-			functions beginning with these instructions. Eventually, we should
-			dynamically rewrite them to make them position-independent.
-	@todo	Write mach_unoverride(), which would remove an override placed on a
-			function. Must be multiple-override aware, which means an almost
-			complete rewrite under the covers, because the target address can't
-			be spread across two load instructions like it is now since it will
-			need to be atomically updatable.
-	@todo	Add non-rentry variants of overrides to test_mach_override.
-
-	***************************************************************************/
-
-#ifdef __APPLE__
-
-#ifndef		_mach_override_
-#define		_mach_override_
-
-#include <sys/types.h>
-#include <mach/error.h>
-
-#ifdef	__cplusplus
-	extern	"C"	{
-#endif
-
-/**
-	Returned if the function to be overrided begins with a 'mfctr' instruction.
-*/
-#define	err_cannot_override	(err_local|1)
-
-/************************************************************************************//**
-	Dynamically overrides the function implementation referenced by
-	originalFunctionAddress with the implentation pointed to by overrideFunctionAddress.
-	Optionally returns a pointer to a "reentry island" which, if jumped to, will resume
-	the original implementation.
-	
-	@param	originalFunctionAddress			->	Required address of the function to
-												override (with overrideFunctionAddress).
-	@param	overrideFunctionAddress			->	Required address to the overriding
-												function.
-	@param	originalFunctionReentryIsland	<-	Optional pointer to pointer to the
-												reentry island. Can be NULL.
-	@result									<-	err_cannot_override if the original
-												function's implementation begins with
-												the 'mfctr' instruction.
-
-	************************************************************************************/
-
-// We're prefixing mach_override_ptr() with "__asan_" to avoid name conflicts with other
-// mach_override_ptr() implementations that may appear in the client program.
-    mach_error_t
-__asan_mach_override_ptr(
-	void *originalFunctionAddress,
-    const void *overrideFunctionAddress,
-    void **originalFunctionReentryIsland );
-
-// Allow to use custom allocation and deallocation routines with mach_override_ptr().
-// This should help to speed up the things on x86_64.
-typedef mach_error_t island_malloc( void **ptr, size_t size, void *hint );
-typedef mach_error_t island_free( void *ptr );
-
-    mach_error_t
-__asan_mach_override_ptr_custom(
-	void *originalFunctionAddress,
-    const void *overrideFunctionAddress,
-    void **originalFunctionReentryIsland,
-    island_malloc *alloc,
-    island_free *dealloc );
-
-/************************************************************************************//**
-	
-
-	************************************************************************************/
- 
-#ifdef	__cplusplus
-
-#define MACH_OVERRIDE( ORIGINAL_FUNCTION_RETURN_TYPE, ORIGINAL_FUNCTION_NAME, ORIGINAL_FUNCTION_ARGS, ERR )			\
-	{																												\
-		static ORIGINAL_FUNCTION_RETURN_TYPE (*ORIGINAL_FUNCTION_NAME##_reenter)ORIGINAL_FUNCTION_ARGS;				\
-		static bool ORIGINAL_FUNCTION_NAME##_overriden = false;														\
-		class mach_override_class__##ORIGINAL_FUNCTION_NAME {														\
-		public:																										\
-			static kern_return_t override(void *originalFunctionPtr) {												\
-				kern_return_t result = err_none;																	\
-				if (!ORIGINAL_FUNCTION_NAME##_overriden) {															\
-					ORIGINAL_FUNCTION_NAME##_overriden = true;														\
-					result = mach_override_ptr( (void*)originalFunctionPtr,											\
-												(void*)mach_override_class__##ORIGINAL_FUNCTION_NAME::replacement,	\
-												(void**)&ORIGINAL_FUNCTION_NAME##_reenter );						\
-				}																									\
-				return result;																						\
-			}																										\
-			static ORIGINAL_FUNCTION_RETURN_TYPE replacement ORIGINAL_FUNCTION_ARGS {
-
-#define END_MACH_OVERRIDE( ORIGINAL_FUNCTION_NAME )																	\
-			}																										\
-		};																											\
-																													\
-		err = mach_override_class__##ORIGINAL_FUNCTION_NAME::override((void*)ORIGINAL_FUNCTION_NAME);				\
-	}
- 
-#endif
-
-#ifdef	__cplusplus
-	}
-#endif
-#endif	//	_mach_override_
-
-#endif  // __APPLE__
diff --git a/lib/lit.common.unit.cfg b/lib/lit.common.unit.cfg
index 8250b4a..ca00abb 100644
--- a/lib/lit.common.unit.cfg
+++ b/lib/lit.common.unit.cfg
@@ -7,8 +7,8 @@
 import os
 
 # Setup test format
-build_type = getattr(config, "build_type", "Debug")
-config.test_format = lit.formats.GoogleTest(build_type, "Test")
+llvm_build_mode = getattr(config, "llvm_build_mode", "Debug")
+config.test_format = lit.formats.GoogleTest(llvm_build_mode, "Test")
 
 # Setup test suffixes.
 config.suffixes = []
diff --git a/lib/msan/CMakeLists.txt b/lib/msan/CMakeLists.txt
new file mode 100644
index 0000000..fa057a6
--- /dev/null
+++ b/lib/msan/CMakeLists.txt
@@ -0,0 +1,34 @@
+include_directories(..)
+
+# Runtime library sources and build flags.
+set(MSAN_RTL_SOURCES
+  msan.cc
+  msan_allocator.cc
+  msan_interceptors.cc
+  msan_linux.cc
+  msan_new_delete.cc
+  msan_report.cc
+  )
+set(MSAN_RTL_CFLAGS
+  ${SANITIZER_COMMON_CFLAGS}
+  -fPIE
+  # Prevent clang from generating libc calls.
+  -ffreestanding)
+
+# Static runtime library.
+set(MSAN_RUNTIME_LIBRARIES)
+set(arch "x86_64")
+if(CAN_TARGET_${arch})
+  add_compiler_rt_static_runtime(clang_rt.msan-${arch} ${arch}
+    SOURCES ${MSAN_RTL_SOURCES}
+            $<TARGET_OBJECTS:RTInterception.${arch}>
+            $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+    CFLAGS ${MSAN_RTL_CFLAGS})
+  list(APPEND MSAN_RUNTIME_LIBRARIES clang_rt.msan-${arch})
+endif()
+
+if(LLVM_INCLUDE_TESTS)
+  add_subdirectory(tests)
+endif()
+
+add_subdirectory(lit_tests)
diff --git a/lib/msan/Makefile.mk b/lib/msan/Makefile.mk
new file mode 100644
index 0000000..99e3b03
--- /dev/null
+++ b/lib/msan/Makefile.mk
@@ -0,0 +1,24 @@
+#===- lib/msan/Makefile.mk ---------------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := msan
+SubDirs :=
+
+Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
+ObjNames := $(Sources:%.cc=%.o)
+
+Implementation := Generic
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard $(Dir)/*.h)
+Dependencies += $(wildcard $(Dir)/../interception/*.h)
+Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
+
+# Define a convenience variable for all the msan functions.
+MsanFunctions := $(Sources:%.cc=%)
diff --git a/lib/msan/lit_tests/CMakeLists.txt b/lib/msan/lit_tests/CMakeLists.txt
new file mode 100644
index 0000000..62b2101
--- /dev/null
+++ b/lib/msan/lit_tests/CMakeLists.txt
@@ -0,0 +1,32 @@
+set(MSAN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
+set(MSAN_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/..)
+
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+  )
+
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+  )
+
+if(COMPILER_RT_CAN_EXECUTE_TESTS)
+  # Run MSan tests only if we're sure we may produce working binaries.
+  set(MSAN_TEST_DEPS
+    clang clang-headers FileCheck count not llvm-nm llvm-symbolizer
+    ${MSAN_RUNTIME_LIBRARIES}
+    )
+  set(MSAN_TEST_PARAMS
+    msan_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+    )
+  if(LLVM_INCLUDE_TESTS)
+    list(APPEND MSAN_TEST_DEPS MsanUnitTests)
+  endif()
+  add_lit_testsuite(check-msan "Running the MemorySanitizer tests"
+    ${CMAKE_CURRENT_BINARY_DIR}
+    PARAMS ${MSAN_TEST_PARAMS}
+    DEPENDS ${MSAN_TEST_DEPS}
+    )
+  set_target_properties(check-msan PROPERTIES FOLDER "MSan tests")
+endif()
diff --git a/lib/msan/lit_tests/Unit/lit.cfg b/lib/msan/lit_tests/Unit/lit.cfg
new file mode 100644
index 0000000..afb30e0
--- /dev/null
+++ b/lib/msan/lit_tests/Unit/lit.cfg
@@ -0,0 +1,27 @@
+# -*- Python -*-
+
+import os
+
+def get_required_attr(config, attr_name):
+  attr_value = getattr(config, attr_name, None)
+  if not attr_value:
+    lit.fatal("No attribute %r in test configuration! You may need to run "
+              "tests from your build directory or add this attribute "
+              "to lit.site.cfg " % attr_name)
+  return attr_value
+
+# Setup attributes common for all compiler-rt projects.
+llvm_src_root = get_required_attr(config, 'llvm_src_root')
+compiler_rt_lit_unit_cfg = os.path.join(llvm_src_root, "projects",
+                                        "compiler-rt", "lib",
+                                        "lit.common.unit.cfg")
+lit.load_config(config, compiler_rt_lit_unit_cfg)
+
+# Setup config name.
+config.name = 'MemorySanitizer-Unit'
+
+# Setup test source and exec root. For unit tests, we define
+# it as build directory with MSan unit tests.
+msan_binary_dir = get_required_attr(config, "msan_binary_dir")
+config.test_exec_root = os.path.join(msan_binary_dir, "tests")
+config.test_source_root = config.test_exec_root
diff --git a/lib/msan/lit_tests/Unit/lit.site.cfg.in b/lib/msan/lit_tests/Unit/lit.site.cfg.in
new file mode 100644
index 0000000..4ae84c4
--- /dev/null
+++ b/lib/msan/lit_tests/Unit/lit.site.cfg.in
@@ -0,0 +1,16 @@
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+
+config.target_triple = "@TARGET_TRIPLE@"
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_build_mode = "@LLVM_BUILD_MODE@"
+config.msan_binary_dir = "@MSAN_BINARY_DIR@"
+
+try:
+  config.llvm_build_mode = config.llvm_build_mode % lit.params
+except KeyError,e:
+  key, = e.args
+  lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key, key))
+
+# Let the main config do the real work.
+lit.load_config(config, "@MSAN_SOURCE_DIR@/lit_tests/Unit/lit.cfg")
diff --git a/lib/msan/lit_tests/heap-origin.cc b/lib/msan/lit_tests/heap-origin.cc
new file mode 100644
index 0000000..54e2c31
--- /dev/null
+++ b/lib/msan/lit_tests/heap-origin.cc
@@ -0,0 +1,33 @@
+// RUN: %clangxx_msan -m64 -O0 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O1 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O2 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O3 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O0 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O1 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O2 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O3 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
+
+#include <stdlib.h>
+int main(int argc, char **argv) {
+  char *volatile x = (char*)malloc(5 * sizeof(char));
+  if (*x)
+    exit(0);
+  // CHECK: WARNING: Use of uninitialized value
+  // CHECK: {{#0 0x.* in main .*heap-origin.cc:}}[[@LINE-3]]
+
+  // CHECK-ORIGINS: Uninitialized value was created by a heap allocation
+  // CHECK-ORIGINS: {{#0 0x.* in .*malloc}}
+  // CHECK-ORIGINS: {{#1 0x.* in main .*heap-origin.cc:}}[[@LINE-8]]
+
+  // CHECK: SUMMARY: MemorySanitizer: use-of-uninitialized-value {{.*heap-origin.cc:.* main}}
+  return 0;
+}
diff --git a/lib/msan/lit_tests/lit.cfg b/lib/msan/lit_tests/lit.cfg
new file mode 100644
index 0000000..9429050
--- /dev/null
+++ b/lib/msan/lit_tests/lit.cfg
@@ -0,0 +1,83 @@
+# -*- Python -*-
+
+import os
+
+# Setup config name.
+config.name = 'MemorySanitizer'
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+def DisplayNoConfigMessage():
+  lit.fatal("No site specific configuration available! " +
+            "Try running your test from the build tree or running " +
+            "make check-msan")
+
+# Figure out LLVM source root.
+llvm_src_root = getattr(config, 'llvm_src_root', None)
+if llvm_src_root is None:
+  # We probably haven't loaded the site-specific configuration: the user
+  # is likely trying to run a test file directly, and the site configuration
+  # wasn't created by the build system.
+  msan_site_cfg = lit.params.get('msan_site_config', None)
+  if (msan_site_cfg) and (os.path.exists(msan_site_cfg)):
+    lit.load_config(config, msan_site_cfg)
+    raise SystemExit
+
+  # Try to guess the location of site-specific configuration using llvm-config
+  # util that can point where the build tree is.
+  llvm_config = lit.util.which("llvm-config", config.environment["PATH"])
+  if not llvm_config:
+    DisplayNoConfigMessage()
+
+  # Validate that llvm-config points to the same source tree.
+  llvm_src_root = lit.util.capture(["llvm-config", "--src-root"]).strip()
+  msan_test_src_root = os.path.join(llvm_src_root, "projects", "compiler-rt",
+                                    "lib", "msan", "lit_tests")
+  if (os.path.realpath(msan_test_src_root) !=
+      os.path.realpath(config.test_source_root)):
+    DisplayNoConfigMessage()
+
+  # Find out the presumed location of generated site config.
+  llvm_obj_root = lit.util.capture(["llvm-config", "--obj-root"]).strip()
+  msan_site_cfg = os.path.join(llvm_obj_root, "projects", "compiler-rt",
+                               "lib", "msan", "lit_tests", "lit.site.cfg")
+  if (not msan_site_cfg) or (not os.path.exists(msan_site_cfg)):
+    DisplayNoConfigMessage()
+
+  lit.load_config(config, msan_site_cfg)
+  raise SystemExit
+
+# Setup attributes common for all compiler-rt projects.
+compiler_rt_lit_cfg = os.path.join(llvm_src_root, "projects", "compiler-rt",
+                                   "lib", "lit.common.cfg")
+if (not compiler_rt_lit_cfg) or (not os.path.exists(compiler_rt_lit_cfg)):
+  lit.fatal("Can't find common compiler-rt lit config at: %r"
+            % compiler_rt_lit_cfg)
+lit.load_config(config, compiler_rt_lit_cfg)
+
+# Setup default compiler flags used with -fsanitize=memory option.
+clang_msan_cxxflags = ["-ccc-cxx ",
+                       "-fsanitize=memory",
+                       "-mno-omit-leaf-frame-pointer",
+                       "-fno-omit-frame-pointer",
+                       "-fno-optimize-sibling-calls",
+                       "-g",
+                       "-fPIE",
+                       "-pie"]
+config.substitutions.append( ("%clangxx_msan ",
+                              " ".join([config.clang] + clang_msan_cxxflags) + 
+                              " ") )
+
+# Setup path to external LLVM symbolizer to run MemorySanitizer output tests.
+llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
+if llvm_tools_dir:
+  llvm_symbolizer_path = os.path.join(llvm_tools_dir, "llvm-symbolizer")
+  config.environment['MSAN_SYMBOLIZER_PATH'] = llvm_symbolizer_path
+
+# Default test suffixes.
+config.suffixes = ['.c', '.cc', '.cpp']
+
+# MemorySanitizer tests are currently supported on Linux only.
+if config.host_os not in ['Linux']:
+  config.unsupported = True
diff --git a/lib/msan/lit_tests/lit.site.cfg.in b/lib/msan/lit_tests/lit.site.cfg.in
new file mode 100644
index 0000000..cc7c7a0
--- /dev/null
+++ b/lib/msan/lit_tests/lit.site.cfg.in
@@ -0,0 +1,17 @@
+config.target_triple = "@TARGET_TRIPLE@"
+config.host_os = "@HOST_OS@"
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.clang = "@LLVM_BINARY_DIR@/bin/clang"
+
+# LLVM tools dir can be passed in lit parameters, so try to
+# apply substitution.
+try:
+  config.llvm_tools_dir = config.llvm_tools_dir % lit.params
+except KeyError,e:
+  key, = e.args
+  lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key, key))
+
+# Let the main config do the real work.
+lit.load_config(config, "@MSAN_SOURCE_DIR@/lit_tests/lit.cfg")
diff --git a/lib/msan/lit_tests/no_sanitize_memory.cc b/lib/msan/lit_tests/no_sanitize_memory.cc
new file mode 100644
index 0000000..48afc17
--- /dev/null
+++ b/lib/msan/lit_tests/no_sanitize_memory.cc
@@ -0,0 +1,34 @@
+// RUN: %clangxx_msan -m64 -O0 %s -o %t && %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O1 %s -o %t && %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O2 %s -o %t && %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O3 %s -o %t && %t >%t.out 2>&1
+
+// RUN: %clangxx_msan -m64 -O0 %s -o %t -DCHECK_IN_F && %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O1 %s -o %t -DCHECK_IN_F && %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O2 %s -o %t -DCHECK_IN_F && %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O3 %s -o %t -DCHECK_IN_F && %t >%t.out 2>&1
+
+// Test that (no_sanitize_memory) functions
+// * don't check shadow values (-DCHECK_IN_F)
+// * treat all values loaded from memory as fully initialized (-UCHECK_IN_F)
+
+#include <stdlib.h>
+#include <stdio.h>
+
+__attribute__((noinline))
+__attribute__((no_sanitize_memory))
+int f(void) {
+  int x;
+  int * volatile p = &x;
+#ifdef CHECK_IN_F
+  if (*p)
+    exit(0);
+#endif
+  return *p;
+}
+
+int main(void) {
+  if (f())
+    exit(0);
+  return 0;
+}
diff --git a/lib/msan/lit_tests/no_sanitize_memory_prop.cc b/lib/msan/lit_tests/no_sanitize_memory_prop.cc
new file mode 100644
index 0000000..c74ca6b
--- /dev/null
+++ b/lib/msan/lit_tests/no_sanitize_memory_prop.cc
@@ -0,0 +1,33 @@
+// RUN: %clangxx_msan -m64 -O0 %s -o %t && %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O1 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O2 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O3 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+
+// Test that (no_sanitize_memory) functions propagate shadow.
+
+// Note that at -O0 there is no report, because 'x' in 'f' is spilled to the
+// stack, and then loaded back as a fully initialiazed value (due to
+// no_sanitize_memory attribute).
+
+#include <stdlib.h>
+#include <stdio.h>
+
+__attribute__((noinline))
+__attribute__((no_sanitize_memory))
+int f(int x) {
+  return x;
+}
+
+int main(void) {
+  int x;
+  int * volatile p = &x;
+  int y = f(*p);
+  // CHECK: WARNING: Use of uninitialized value
+  // CHECK: {{#0 0x.* in main .*no_sanitize_memory_prop.cc:}}[[@LINE+1]]
+  if (y)
+    exit(0);
+  return 0;
+}
diff --git a/lib/msan/lit_tests/stack-origin.cc b/lib/msan/lit_tests/stack-origin.cc
new file mode 100644
index 0000000..90f5273
--- /dev/null
+++ b/lib/msan/lit_tests/stack-origin.cc
@@ -0,0 +1,32 @@
+// RUN: %clangxx_msan -m64 -O0 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O1 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O2 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O3 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O0 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O1 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O2 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O3 %s -o %t && not %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
+
+#include <stdlib.h>
+int main(int argc, char **argv) {
+  int x;
+  int *volatile p = &x;
+  if (*p)
+    exit(0);
+  // CHECK: WARNING: Use of uninitialized value
+  // CHECK: {{#0 0x.* in main .*stack-origin.cc:}}[[@LINE-3]]
+
+  // CHECK-ORIGINS: Uninitialized value was created by an allocation of 'x' in the stack frame of function 'main'
+
+  // CHECK: SUMMARY: MemorySanitizer: use-of-uninitialized-value {{.*stack-origin.cc:.* main}}
+  return 0;
+}
diff --git a/lib/msan/msan.cc b/lib/msan/msan.cc
new file mode 100644
index 0000000..96f99d4
--- /dev/null
+++ b/lib/msan/msan.cc
@@ -0,0 +1,454 @@
+//===-- msan.cc -----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer runtime.
+//===----------------------------------------------------------------------===//
+
+#include "msan.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
+
+#include "interception/interception.h"
+
+// ACHTUNG! No system header includes in this file.
+
+using namespace __sanitizer;
+
+// Globals.
+static THREADLOCAL int msan_expect_umr = 0;
+static THREADLOCAL int msan_expected_umr_found = 0;
+
+static int msan_running_under_dr = 0;
+
+SANITIZER_INTERFACE_ATTRIBUTE
+THREADLOCAL u64 __msan_param_tls[kMsanParamTlsSizeInWords];
+
+SANITIZER_INTERFACE_ATTRIBUTE
+THREADLOCAL u32 __msan_param_origin_tls[kMsanParamTlsSizeInWords];
+
+SANITIZER_INTERFACE_ATTRIBUTE
+THREADLOCAL u64 __msan_retval_tls[kMsanRetvalTlsSizeInWords];
+
+SANITIZER_INTERFACE_ATTRIBUTE
+THREADLOCAL u32 __msan_retval_origin_tls;
+
+SANITIZER_INTERFACE_ATTRIBUTE
+THREADLOCAL u64 __msan_va_arg_tls[kMsanParamTlsSizeInWords];
+
+SANITIZER_INTERFACE_ATTRIBUTE
+THREADLOCAL u64 __msan_va_arg_overflow_size_tls;
+
+SANITIZER_INTERFACE_ATTRIBUTE
+THREADLOCAL u32 __msan_origin_tls;
+
+static THREADLOCAL struct {
+  uptr stack_top, stack_bottom;
+} __msan_stack_bounds;
+
+static THREADLOCAL bool is_in_symbolizer;
+static THREADLOCAL bool is_in_loader;
+
+extern "C" const int __msan_track_origins;
+int __msan_get_track_origins() {
+  return __msan_track_origins;
+}
+
+namespace __msan {
+
+static bool IsRunningUnderDr() {
+  bool result = false;
+  MemoryMappingLayout proc_maps;
+  const sptr kBufSize = 4095;
+  char *filename = (char*)MmapOrDie(kBufSize, __FUNCTION__);
+  while (proc_maps.Next(/* start */0, /* end */0, /* file_offset */0,
+                        filename, kBufSize)) {
+    if (internal_strstr(filename, "libdynamorio") != 0) {
+      result = true;
+      break;
+    }
+  }
+  UnmapOrDie(filename, kBufSize);
+  return result;
+}
+
+void EnterSymbolizer() { is_in_symbolizer = true; }
+void ExitSymbolizer()  { is_in_symbolizer = false; }
+bool IsInSymbolizer() { return is_in_symbolizer; }
+
+void EnterLoader() { is_in_loader = true; }
+void ExitLoader()  { is_in_loader = false; }
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+bool __msan_is_in_loader() { return is_in_loader; }
+}
+
+static Flags msan_flags;
+
+Flags *flags() {
+  return &msan_flags;
+}
+
+int msan_inited = 0;
+bool msan_init_is_running;
+
+int msan_report_count = 0;
+
+// Array of stack origins.
+// FIXME: make it resizable.
+static const uptr kNumStackOriginDescrs = 1024 * 1024;
+static const char *StackOriginDescr[kNumStackOriginDescrs];
+static atomic_uint32_t NumStackOriginDescrs;
+
+static void ParseFlagsFromString(Flags *f, const char *str) {
+  ParseFlag(str, &f->poison_heap_with_zeroes, "poison_heap_with_zeroes");
+  ParseFlag(str, &f->poison_stack_with_zeroes, "poison_stack_with_zeroes");
+  ParseFlag(str, &f->poison_in_malloc, "poison_in_malloc");
+  ParseFlag(str, &f->exit_code, "exit_code");
+  if (f->exit_code < 0 || f->exit_code > 127) {
+    Printf("Exit code not in [0, 128) range: %d\n", f->exit_code);
+    f->exit_code = 1;
+    Die();
+  }
+  ParseFlag(str, &f->num_callers, "num_callers");
+  ParseFlag(str, &f->report_umrs, "report_umrs");
+  ParseFlag(str, &f->verbosity, "verbosity");
+  ParseFlag(str, &f->strip_path_prefix, "strip_path_prefix");
+}
+
+static void InitializeFlags(Flags *f, const char *options) {
+  internal_memset(f, 0, sizeof(*f));
+
+  f->poison_heap_with_zeroes = false;
+  f->poison_stack_with_zeroes = false;
+  f->poison_in_malloc = true;
+  f->exit_code = 77;
+  f->num_callers = 20;
+  f->report_umrs = true;
+  f->verbosity = 0;
+  f->strip_path_prefix = "";
+
+  // Override from user-specified string.
+  if (__msan_default_options)
+    ParseFlagsFromString(f, __msan_default_options());
+  ParseFlagsFromString(f, options);
+}
+
+static void GetCurrentStackBounds(uptr *stack_top, uptr *stack_bottom) {
+  if (__msan_stack_bounds.stack_top == 0) {
+    // Break recursion (GetStackTrace -> GetThreadStackTopAndBottom ->
+    // realloc -> GetStackTrace).
+    __msan_stack_bounds.stack_top = __msan_stack_bounds.stack_bottom = 1;
+    GetThreadStackTopAndBottom(/* at_initialization */false,
+                               &__msan_stack_bounds.stack_top,
+                               &__msan_stack_bounds.stack_bottom);
+  }
+  *stack_top = __msan_stack_bounds.stack_top;
+  *stack_bottom = __msan_stack_bounds.stack_bottom;
+}
+
+void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp,
+                   bool fast) {
+  if (!fast) {
+    // Block reports from our interceptors during _Unwind_Backtrace.
+    SymbolizerScope sym_scope;
+    return stack->SlowUnwindStack(pc, max_s);
+  }
+
+  uptr stack_top, stack_bottom;
+  GetCurrentStackBounds(&stack_top, &stack_bottom);
+  stack->size = 0;
+  stack->trace[0] = pc;
+  stack->max_size = max_s;
+  stack->FastUnwindStack(pc, bp, stack_top, stack_bottom);
+}
+
+void PrintWarning(uptr pc, uptr bp) {
+  PrintWarningWithOrigin(pc, bp, __msan_origin_tls);
+}
+
+bool OriginIsValid(u32 origin) {
+  return origin != 0 && origin != (u32)-1;
+}
+
+void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin) {
+  if (msan_expect_umr) {
+    // Printf("Expected UMR\n");
+    __msan_origin_tls = origin;
+    msan_expected_umr_found = 1;
+    return;
+  }
+
+  ++msan_report_count;
+
+  StackTrace stack;
+  GetStackTrace(&stack, kStackTraceMax, pc, bp, /*fast*/false);
+
+  u32 report_origin =
+    (__msan_track_origins && OriginIsValid(origin)) ? origin : 0;
+  ReportUMR(&stack, report_origin);
+
+  if (__msan_track_origins && !OriginIsValid(origin)) {
+    Printf("  ORIGIN: invalid (%x). Might be a bug in MemorySanitizer, "
+           "please report to MemorySanitizer developers.\n",
+           origin);
+  }
+}
+
+}  // namespace __msan
+
+// Interface.
+
+using namespace __msan;
+
+void __msan_warning() {
+  GET_CALLER_PC_BP_SP;
+  (void)sp;
+  PrintWarning(pc, bp);
+}
+
+void __msan_warning_noreturn() {
+  GET_CALLER_PC_BP_SP;
+  (void)sp;
+  PrintWarning(pc, bp);
+  Printf("Exiting\n");
+  Die();
+}
+
+void __msan_init() {
+  if (msan_inited) return;
+  msan_init_is_running = 1;
+  SanitizerToolName = "MemorySanitizer";
+
+  InstallAtExitHandler();
+  SetDieCallback(MsanDie);
+  InitializeInterceptors();
+
+  ReplaceOperatorsNewAndDelete();
+  const char *msan_options = GetEnv("MSAN_OPTIONS");
+  InitializeFlags(&msan_flags, msan_options);
+  if (StackSizeIsUnlimited()) {
+    if (flags()->verbosity)
+      Printf("Unlimited stack, doing reexec\n");
+    // A reasonably large stack size. It is bigger than the usual 8Mb, because,
+    // well, the program could have been run with unlimited stack for a reason.
+    SetStackSizeLimitInBytes(32 * 1024 * 1024);
+    ReExec();
+  }
+
+  if (flags()->verbosity)
+    Printf("MSAN_OPTIONS: %s\n", msan_options ? msan_options : "<empty>");
+
+  msan_running_under_dr = IsRunningUnderDr();
+  __msan_clear_on_return();
+  if (__msan_track_origins && flags()->verbosity > 0)
+    Printf("msan_track_origins\n");
+  if (!InitShadow(/* prot1 */false, /* prot2 */true, /* map_shadow */true,
+                  __msan_track_origins)) {
+    // FIXME: prot1 = false is only required when running under DR.
+    Printf("FATAL: MemorySanitizer can not mmap the shadow memory.\n");
+    Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
+    Printf("FATAL: Disabling ASLR is known to cause this error.\n");
+    Printf("FATAL: If running under GDB, try "
+           "'set disable-randomization off'.\n");
+    DumpProcessMap();
+    Die();
+  }
+
+  const char *external_symbolizer = GetEnv("MSAN_SYMBOLIZER_PATH");
+  if (external_symbolizer && external_symbolizer[0]) {
+    CHECK(InitializeExternalSymbolizer(external_symbolizer));
+  }
+
+  GetThreadStackTopAndBottom(/* at_initialization */true,
+                             &__msan_stack_bounds.stack_top,
+                             &__msan_stack_bounds.stack_bottom);
+  if (flags()->verbosity)
+    Printf("MemorySanitizer init done\n");
+  msan_init_is_running = 0;
+  msan_inited = 1;
+}
+
+void __msan_set_exit_code(int exit_code) {
+  flags()->exit_code = exit_code;
+}
+
+void __msan_set_expect_umr(int expect_umr) {
+  if (expect_umr) {
+    msan_expected_umr_found = 0;
+  } else if (!msan_expected_umr_found) {
+    GET_CALLER_PC_BP_SP;
+    (void)sp;
+    StackTrace stack;
+    GetStackTrace(&stack, kStackTraceMax, pc, bp, /*fast*/false);
+    ReportExpectedUMRNotFound(&stack);
+    Die();
+  }
+  msan_expect_umr = expect_umr;
+}
+
+void __msan_print_shadow(const void *x, uptr size) {
+  unsigned char *s = (unsigned char*)MEM_TO_SHADOW(x);
+  u32 *o = (u32*)MEM_TO_ORIGIN(x);
+  for (uptr i = 0; i < size; i++) {
+    Printf("%x%x ", s[i] >> 4, s[i] & 0xf);
+  }
+  Printf("\n");
+  if (__msan_track_origins) {
+    for (uptr i = 0; i < size / 4; i++) {
+      Printf(" o: %x ", o[i]);
+    }
+    Printf("\n");
+  }
+}
+
+void __msan_print_param_shadow() {
+  for (int i = 0; i < 16; i++) {
+    Printf("#%d:%zx ", i, __msan_param_tls[i]);
+  }
+  Printf("\n");
+}
+
+sptr __msan_test_shadow(const void *x, uptr size) {
+  unsigned char *s = (unsigned char*)MEM_TO_SHADOW((uptr)x);
+  for (uptr i = 0; i < size; ++i)
+    if (s[i])
+      return i;
+  return -1;
+}
+
+int __msan_set_poison_in_malloc(int do_poison) {
+  int old = flags()->poison_in_malloc;
+  flags()->poison_in_malloc = do_poison;
+  return old;
+}
+
+int  __msan_has_dynamic_component() {
+  return msan_running_under_dr;
+}
+
+NOINLINE
+void __msan_clear_on_return() {
+  __msan_param_tls[0] = 0;
+}
+
+static void* get_tls_base() {
+  u64 p;
+  asm("mov %%fs:0, %0"
+      : "=r"(p) ::);
+  return (void*)p;
+}
+
+int __msan_get_retval_tls_offset() {
+  // volatile here is needed to avoid UB, because the compiler thinks that we
+  // are doing address arithmetics on unrelated pointers, and takes some
+  // shortcuts
+  volatile sptr retval_tls_p = (sptr)&__msan_retval_tls;
+  volatile sptr tls_base_p = (sptr)get_tls_base();
+  return retval_tls_p - tls_base_p;
+}
+
+int __msan_get_param_tls_offset() {
+  // volatile here is needed to avoid UB, because the compiler thinks that we
+  // are doing address arithmetics on unrelated pointers, and takes some
+  // shortcuts
+  volatile sptr param_tls_p = (sptr)&__msan_param_tls;
+  volatile sptr tls_base_p = (sptr)get_tls_base();
+  return param_tls_p - tls_base_p;
+}
+
+void __msan_partial_poison(void* data, void* shadow, uptr size) {
+  internal_memcpy((void*)MEM_TO_SHADOW((uptr)data), shadow, size);
+}
+
+void __msan_load_unpoisoned(void *src, uptr size, void *dst) {
+  internal_memcpy(dst, src, size);
+  __msan_unpoison(dst, size);
+}
+
+void __msan_set_origin(void *a, uptr size, u32 origin) {
+  // Origin mapping is 4 bytes per 4 bytes of application memory.
+  // Here we extend the range such that its left and right bounds are both
+  // 4 byte aligned.
+  if (!__msan_track_origins) return;
+  uptr x = MEM_TO_ORIGIN((uptr)a);
+  uptr beg = x & ~3UL;  // align down.
+  uptr end = (x + size + 3) & ~3UL;  // align up.
+  u64 origin64 = ((u64)origin << 32) | origin;
+  // This is like memset, but the value is 32-bit. We unroll by 2 two write
+  // 64-bits at once. May want to unroll further to get 128-bit stores.
+  if (beg & 7ULL) {
+    *(u32*)beg = origin;
+    beg += 4;
+  }
+  for (uptr addr = beg; addr < (end & ~7UL); addr += 8)
+    *(u64*)addr = origin64;
+  if (end & 7ULL)
+    *(u32*)(end - 4) = origin;
+}
+
+// 'descr' is created at compile time and contains '----' in the beginning.
+// When we see descr for the first time we replace '----' with a uniq id
+// and set the origin to (id | (31-th bit)).
+void __msan_set_alloca_origin(void *a, uptr size, const char *descr) {
+  static const u32 dash = '-';
+  static const u32 first_timer =
+      dash + (dash << 8) + (dash << 16) + (dash << 24);
+  u32 *id_ptr = (u32*)descr;
+  bool print = false;  // internal_strstr(descr + 4, "AllocaTOTest") != 0;
+  u32 id = *id_ptr;
+  if (id == first_timer) {
+    id = atomic_fetch_add(&NumStackOriginDescrs,
+                          1, memory_order_relaxed);
+    *id_ptr = id;
+    CHECK_LT(id, kNumStackOriginDescrs);
+    StackOriginDescr[id] = descr + 4;
+    if (print)
+      Printf("First time: id=%d %s \n", id, descr + 4);
+  }
+  id |= 1U << 31;
+  if (print)
+    Printf("__msan_set_alloca_origin: descr=%s id=%x\n", descr + 4, id);
+  __msan_set_origin(a, size, id);
+}
+
+const char *__msan_get_origin_descr_if_stack(u32 id) {
+  if ((id >> 31) == 0) return 0;
+  id &= (1U << 31) - 1;
+  CHECK_LT(id, kNumStackOriginDescrs);
+  return StackOriginDescr[id];
+}
+
+
+u32 __msan_get_origin(void *a) {
+  if (!__msan_track_origins) return 0;
+  uptr x = (uptr)a;
+  uptr aligned = x & ~3ULL;
+  uptr origin_ptr = MEM_TO_ORIGIN(aligned);
+  return *(u32*)origin_ptr;
+}
+
+u32 __msan_get_umr_origin() {
+  return __msan_origin_tls;
+}
+
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+extern "C" {
+SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
+const char* __msan_default_options() { return ""; }
+}  // extern "C"
+#endif
+
diff --git a/lib/msan/msan.h b/lib/msan/msan.h
new file mode 100644
index 0000000..123dd36
--- /dev/null
+++ b/lib/msan/msan.h
@@ -0,0 +1,86 @@
+//===-- msan.h --------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// Private MSan header.
+//===----------------------------------------------------------------------===//
+
+#ifndef MSAN_H
+#define MSAN_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "msan_interface_internal.h"
+#include "msan_flags.h"
+
+#define MEM_TO_SHADOW(mem) (((uptr)mem)       & ~0x400000000000ULL)
+#define MEM_TO_ORIGIN(mem) (MEM_TO_SHADOW(mem) + 0x200000000000ULL)
+#define MEM_IS_APP(mem)    ((uptr)mem >=         0x600000000000ULL)
+#define MEM_IS_SHADOW(mem) ((uptr)mem >=         0x200000000000ULL && \
+                            (uptr)mem <=         0x400000000000ULL)
+
+struct link_map;  // Opaque type returned by dlopen().
+
+const int kMsanParamTlsSizeInWords = 100;
+const int kMsanRetvalTlsSizeInWords = 100;
+
+namespace __msan {
+extern int msan_inited;
+extern bool msan_init_is_running;
+extern int msan_report_count;
+
+bool ProtectRange(uptr beg, uptr end);
+bool InitShadow(bool prot1, bool prot2, bool map_shadow, bool init_origins);
+char *GetProcSelfMaps();
+void InitializeInterceptors();
+
+void *MsanReallocate(StackTrace *stack, void *oldp, uptr size,
+                     uptr alignment, bool zeroise);
+void MsanDeallocate(void *ptr);
+void InstallTrapHandler();
+void InstallAtExitHandler();
+void ReplaceOperatorsNewAndDelete();
+
+void EnterSymbolizer();
+void ExitSymbolizer();
+bool IsInSymbolizer();
+
+struct SymbolizerScope {
+  SymbolizerScope() { EnterSymbolizer(); }
+  ~SymbolizerScope() { ExitSymbolizer(); }
+};
+
+void EnterLoader();
+void ExitLoader();
+
+void MsanDie();
+void PrintWarning(uptr pc, uptr bp);
+void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin);
+
+void GetStackTrace(StackTrace *stack, uptr max_s, uptr pc, uptr bp,
+                   bool fast);
+
+void ReportUMR(StackTrace *stack, u32 origin);
+void ReportExpectedUMRNotFound(StackTrace *stack);
+void ReportAtExitStatistics();
+
+void UnpoisonMappedDSO(struct link_map *map);
+
+#define GET_MALLOC_STACK_TRACE                                     \
+  StackTrace stack;                                                \
+  stack.size = 0;                                                  \
+  if (__msan_get_track_origins() && msan_inited)                   \
+    GetStackTrace(&stack, flags()->num_callers,                    \
+        StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(),           \
+        /* fast */ true)
+
+}  // namespace __msan
+
+#endif  // MSAN_H
diff --git a/lib/msan/msan_allocator.cc b/lib/msan/msan_allocator.cc
new file mode 100644
index 0000000..7435843
--- /dev/null
+++ b/lib/msan/msan_allocator.cc
@@ -0,0 +1,107 @@
+//===-- msan_allocator.cc --------------------------- ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer allocator.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "msan.h"
+
+namespace __msan {
+
+struct Metadata {
+  uptr requested_size;
+};
+
+static const uptr kAllocatorSpace = 0x600000000000ULL;
+static const uptr kAllocatorSize   = 0x80000000000;  // 8T.
+static const uptr kMetadataSize  = sizeof(Metadata);
+
+typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, kMetadataSize,
+                             DefaultSizeClassMap> PrimaryAllocator;
+typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
+typedef LargeMmapAllocator<> SecondaryAllocator;
+typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
+                          SecondaryAllocator> Allocator;
+
+static THREADLOCAL AllocatorCache cache;
+static Allocator allocator;
+
+static int inited = 0;
+
+static inline void Init() {
+  if (inited) return;
+  __msan_init();
+  inited = true;  // this must happen before any threads are created.
+  allocator.Init();
+}
+
+static void *MsanAllocate(StackTrace *stack, uptr size,
+                          uptr alignment, bool zeroise) {
+  Init();
+  void *res = allocator.Allocate(&cache, size, alignment, false);
+  Metadata *meta = reinterpret_cast<Metadata*>(allocator.GetMetaData(res));
+  meta->requested_size = size;
+  if (zeroise)
+    __msan_clear_and_unpoison(res, size);
+  else if (flags()->poison_in_malloc)
+    __msan_poison(res, size);
+  if (__msan_get_track_origins()) {
+    u32 stack_id = StackDepotPut(stack->trace, stack->size);
+    CHECK(stack_id);
+    CHECK_EQ((stack_id >> 31), 0);  // Higher bit is occupied by stack origins.
+    __msan_set_origin(res, size, stack_id);
+  }
+  return res;
+}
+
+void MsanDeallocate(void *p) {
+  CHECK(p);
+  Init();
+  Metadata *meta = reinterpret_cast<Metadata*>(allocator.GetMetaData(p));
+  uptr size = meta->requested_size;
+  // This memory will not be reused by anyone else, so we are free to keep it
+  // poisoned.
+  __msan_poison(p, size);
+  if (__msan_get_track_origins())
+    __msan_set_origin(p, size, -1);
+  allocator.Deallocate(&cache, p);
+}
+
+void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size,
+                     uptr alignment, bool zeroise) {
+  if (!old_p)
+    return MsanAllocate(stack, new_size, alignment, zeroise);
+  if (!new_size) {
+    MsanDeallocate(old_p);
+    return 0;
+  }
+  Metadata *meta = reinterpret_cast<Metadata*>(allocator.GetMetaData(old_p));
+  uptr old_size = meta->requested_size;
+  uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(old_p);
+  if (new_size <= actually_allocated_size) {
+    // We are not reallocating here.
+    meta->requested_size = new_size;
+    if (new_size > old_size)
+      __msan_poison((char*)old_p + old_size, new_size - old_size);
+    return old_p;
+  }
+  uptr memcpy_size = Min(new_size, old_size);
+  void *new_p = MsanAllocate(stack, new_size, alignment, zeroise);
+  // Printf("realloc: old_size %zd new_size %zd\n", old_size, new_size);
+  if (new_p)
+    __msan_memcpy(new_p, old_p, memcpy_size);
+  MsanDeallocate(old_p);
+  return new_p;
+}
+
+}  // namespace __msan
diff --git a/lib/msan/msan_flags.h b/lib/msan/msan_flags.h
new file mode 100644
index 0000000..0c41c2e
--- /dev/null
+++ b/lib/msan/msan_flags.h
@@ -0,0 +1,35 @@
+//===-- msan_flags.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer allocator.
+//===----------------------------------------------------------------------===//
+#ifndef MSAN_FLAGS_H
+#define MSAN_FLAGS_H
+
+namespace __msan {
+
+// Flags.
+struct Flags {
+  int exit_code;
+  int num_callers;
+  int verbosity;
+  bool poison_heap_with_zeroes;  // default: false
+  bool poison_stack_with_zeroes;  // default: false
+  bool poison_in_malloc;  // default: true
+  bool report_umrs;
+  const char *strip_path_prefix;
+};
+
+Flags *flags();
+
+}  // namespace __msan
+
+#endif  // MSAN_FLAGS_H
diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc
new file mode 100644
index 0000000..f81c8de
--- /dev/null
+++ b/lib/msan/msan_interceptors.cc
@@ -0,0 +1,999 @@
+//===-- msan_interceptors.cc ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// Interceptors for standard library functions.
+//
+// FIXME: move as many interceptors as possible into
+// sanitizer_common/sanitizer_common_interceptors.h
+//===----------------------------------------------------------------------===//
+
+#include "interception/interception.h"
+#include "msan.h"
+#include "sanitizer_common/sanitizer_platform_limits_posix.h"
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_libc.h"
+
+#include <stdarg.h>
+// ACHTUNG! No other system header includes in this file.
+// Ideally, we should get rid of stdarg.h as well.
+
+extern "C" const int __msan_keep_going;
+
+using namespace __msan;
+
+#define ENSURE_MSAN_INITED() do { \
+  CHECK(!msan_init_is_running); \
+  if (!msan_inited) { \
+    __msan_init(); \
+  } \
+} while (0)
+
+#define CHECK_UNPOISONED(x, n) \
+  do { \
+    sptr offset = __msan_test_shadow(x, n);                 \
+    if (__msan::IsInSymbolizer()) break;                    \
+    if (offset >= 0 && flags()->report_umrs) {              \
+      GET_CALLER_PC_BP_SP;                                  \
+      (void)sp;                                             \
+      Printf("UMR in %s at offset %d inside [%p, +%d) \n",  \
+             __FUNCTION__, offset, x, n);                   \
+      __msan::PrintWarningWithOrigin(                       \
+        pc, bp, __msan_get_origin((char*)x + offset));      \
+      if (!__msan_keep_going) {                             \
+        Printf("Exiting\n");                                \
+        Die();                                              \
+      }                                                     \
+    }                                                       \
+  } while (0)
+
+static void *fast_memset(void *ptr, int c, SIZE_T n);
+static void *fast_memcpy(void *dst, const void *src, SIZE_T n);
+
+INTERCEPTOR(SIZE_T, fread, void *ptr, SIZE_T size, SIZE_T nmemb, void *file) {
+  ENSURE_MSAN_INITED();
+  SIZE_T res = REAL(fread)(ptr, size, nmemb, file);
+  if (res > 0)
+    __msan_unpoison(ptr, res *size);
+  return res;
+}
+
+INTERCEPTOR(SIZE_T, fread_unlocked, void *ptr, SIZE_T size, SIZE_T nmemb,
+            void *file) {
+  ENSURE_MSAN_INITED();
+  SIZE_T res = REAL(fread_unlocked)(ptr, size, nmemb, file);
+  if (res > 0)
+    __msan_unpoison(ptr, res *size);
+  return res;
+}
+
+INTERCEPTOR(SSIZE_T, readlink, const char *path, char *buf, SIZE_T bufsiz) {
+  ENSURE_MSAN_INITED();
+  SSIZE_T res = REAL(readlink)(path, buf, bufsiz);
+  if (res > 0)
+    __msan_unpoison(buf, res);
+  return res;
+}
+
+INTERCEPTOR(void *, readdir, void *a) {
+  ENSURE_MSAN_INITED();
+  void *res = REAL(readdir)(a);
+  __msan_unpoison(res, __sanitizer::struct_dirent_sz);
+  return res;
+}
+
+INTERCEPTOR(void *, memcpy, void *dest, const void *src, SIZE_T n) {
+  return __msan_memcpy(dest, src, n);
+}
+
+INTERCEPTOR(void *, memmove, void *dest, const void *src, SIZE_T n) {
+  return __msan_memmove(dest, src, n);
+}
+
+INTERCEPTOR(void *, memset, void *s, int c, SIZE_T n) {
+  return __msan_memset(s, c, n);
+}
+
+INTERCEPTOR(int, posix_memalign, void **memptr, SIZE_T alignment, SIZE_T size) {
+  GET_MALLOC_STACK_TRACE;
+  CHECK_EQ(alignment & (alignment - 1), 0);
+  *memptr = MsanReallocate(&stack, 0, size, alignment, false);
+  CHECK_NE(memptr, 0);
+  return 0;
+}
+
+INTERCEPTOR(void, free, void *ptr) {
+  ENSURE_MSAN_INITED();
+  if (ptr == 0) return;
+  MsanDeallocate(ptr);
+}
+
+INTERCEPTOR(SIZE_T, strlen, const char *s) {
+  ENSURE_MSAN_INITED();
+  SIZE_T res = REAL(strlen)(s);
+  CHECK_UNPOISONED(s, res + 1);
+  return res;
+}
+
+INTERCEPTOR(SIZE_T, strnlen, const char *s, SIZE_T n) {
+  ENSURE_MSAN_INITED();
+  SIZE_T res = REAL(strnlen)(s, n);
+  SIZE_T scan_size = (res == n) ? res : res + 1;
+  CHECK_UNPOISONED(s, scan_size);
+  return res;
+}
+
+// FIXME: Add stricter shadow checks in str* interceptors (ex.: strcpy should
+// check the shadow of the terminating \0 byte).
+
+INTERCEPTOR(char *, strcpy, char *dest, const char *src) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  SIZE_T n = REAL(strlen)(src);
+  char *res = REAL(strcpy)(dest, src);  // NOLINT
+  __msan_copy_poison(dest, src, n + 1);
+  return res;
+}
+
+INTERCEPTOR(char *, strncpy, char *dest, const char *src, SIZE_T n) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  SIZE_T copy_size = REAL(strnlen)(src, n);
+  if (copy_size < n)
+    copy_size++;  // trailing \0
+  char *res = REAL(strncpy)(dest, src, n);  // NOLINT
+  __msan_copy_poison(dest, src, copy_size);
+  return res;
+}
+
+INTERCEPTOR(char *, strdup, char *src) {
+  ENSURE_MSAN_INITED();
+  SIZE_T n = REAL(strlen)(src);
+  char *res = REAL(strdup)(src);
+  __msan_copy_poison(res, src, n + 1);
+  return res;
+}
+
+INTERCEPTOR(char *, gcvt, double number, SIZE_T ndigit, char *buf) {
+  ENSURE_MSAN_INITED();
+  char *res = REAL(gcvt)(number, ndigit, buf);
+  // DynamoRio tool will take care of unpoisoning gcvt result for us.
+  if (!__msan_has_dynamic_component()) {
+    SIZE_T n = REAL(strlen)(buf);
+    __msan_unpoison(buf, n + 1);
+  }
+  return res;
+}
+
+INTERCEPTOR(char *, strcat, char *dest, const char *src) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  SIZE_T src_size = REAL(strlen)(src);
+  SIZE_T dest_size = REAL(strlen)(dest);
+  char *res = REAL(strcat)(dest, src);  // NOLINT
+  __msan_copy_poison(dest + dest_size, src, src_size + 1);
+  return res;
+}
+
+INTERCEPTOR(char *, strncat, char *dest, const char *src, SIZE_T n) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  SIZE_T dest_size = REAL(strlen)(dest);
+  SIZE_T copy_size = REAL(strlen)(src);
+  if (copy_size < n)
+    copy_size++;  // trailing \0
+  char *res = REAL(strncat)(dest, src, n);  // NOLINT
+  __msan_copy_poison(dest + dest_size, src, copy_size);
+  return res;
+}
+
+INTERCEPTOR(long, strtol, const char *nptr, char **endptr,  // NOLINT
+            int base) {
+  ENSURE_MSAN_INITED();
+  long res = REAL(strtol)(nptr, endptr, base);  // NOLINT
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(endptr, sizeof(*endptr));
+  }
+  return res;
+}
+
+INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr,  // NOLINT
+            int base) {
+  ENSURE_MSAN_INITED();
+  long res = REAL(strtoll)(nptr, endptr, base);  //NOLINT
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(endptr, sizeof(*endptr));
+  }
+  return res;
+}
+
+INTERCEPTOR(unsigned long, strtoul, const char *nptr, char **endptr,  // NOLINT
+            int base) {
+  ENSURE_MSAN_INITED();
+  unsigned long res = REAL(strtoul)(nptr, endptr, base);  // NOLINT
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(endptr, sizeof(*endptr));
+  }
+  return res;
+}
+
+INTERCEPTOR(unsigned long long, strtoull, const char *nptr,  // NOLINT
+            char **endptr, int base) {
+  ENSURE_MSAN_INITED();
+  unsigned long res = REAL(strtoull)(nptr, endptr, base);  // NOLINT
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(endptr, sizeof(*endptr));
+  }
+  return res;
+}
+
+INTERCEPTOR(double, strtod, const char *nptr, char **endptr) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  double res = REAL(strtod)(nptr, endptr);  // NOLINT
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(endptr, sizeof(*endptr));
+  }
+  return res;
+}
+
+INTERCEPTOR(float, strtof, const char *nptr, char **endptr) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  float res = REAL(strtof)(nptr, endptr);  // NOLINT
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(endptr, sizeof(*endptr));
+  }
+  return res;
+}
+
+INTERCEPTOR(long double, strtold, const char *nptr, char **endptr) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  long double res = REAL(strtold)(nptr, endptr);  // NOLINT
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(endptr, sizeof(*endptr));
+  }
+  return res;
+}
+
+INTERCEPTOR(int, vsnprintf, char *str, uptr size,
+            const char *format, va_list ap) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(vsnprintf)(str, size, format, ap);
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(str, res + 1);
+  }
+  return res;
+}
+
+INTERCEPTOR(int, vsprintf, char *str, const char *format, va_list ap) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(vsprintf)(str, format, ap);
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(str, res + 1);
+  }
+  return res;
+}
+
+INTERCEPTOR(int, vswprintf, void *str, uptr size, void *format, va_list ap) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(vswprintf)(str, size, format, ap);
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(str, 4 * (res + 1));
+  }
+  return res;
+}
+
+INTERCEPTOR(int, sprintf, char *str, const char *format, ...) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  va_list ap;
+  va_start(ap, format);
+  int res = vsprintf(str, format, ap);  // NOLINT
+  va_end(ap);
+  return res;
+}
+
+INTERCEPTOR(int, snprintf, char *str, uptr size, const char *format, ...) {
+  ENSURE_MSAN_INITED();
+  va_list ap;
+  va_start(ap, format);
+  int res = vsnprintf(str, size, format, ap);
+  va_end(ap);
+  return res;
+}
+
+INTERCEPTOR(int, swprintf, void *str, uptr size, void *format, ...) {
+  ENSURE_MSAN_INITED();
+  va_list ap;
+  va_start(ap, format);
+  int res = vswprintf(str, size, format, ap);
+  va_end(ap);
+  return res;
+}
+
+// SIZE_T strftime(char *s, SIZE_T max, const char *format,const struct tm *tm);
+INTERCEPTOR(SIZE_T, strftime, char *s, SIZE_T max, const char *format,
+            void *tm) {
+  ENSURE_MSAN_INITED();
+  SIZE_T res = REAL(strftime)(s, max, format, tm);
+  if (res) __msan_unpoison(s, res + 1);
+  return res;
+}
+
+INTERCEPTOR(SIZE_T, wcstombs, void *dest, void *src, SIZE_T size) {
+  ENSURE_MSAN_INITED();
+  SIZE_T res = REAL(wcstombs)(dest, src, size);
+  if (res != (SIZE_T)-1) __msan_unpoison(dest, res + 1);
+  return res;
+}
+
+// SIZE_T mbstowcs(wchar_t *dest, const char *src, SIZE_T n);
+INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T n) {
+  ENSURE_MSAN_INITED();
+  SIZE_T res = REAL(mbstowcs)(dest, src, n);
+  if (res != (SIZE_T)-1) __msan_unpoison(dest, (res + 1) * sizeof(wchar_t));
+  return res;
+}
+
+INTERCEPTOR(SIZE_T, wcslen, const wchar_t *s) {
+  ENSURE_MSAN_INITED();
+  SIZE_T res = REAL(wcslen)(s);
+  CHECK_UNPOISONED(s, sizeof(wchar_t) * (res + 1));
+  return res;
+}
+
+// wchar_t *wcschr(const wchar_t *wcs, wchar_t wc);
+INTERCEPTOR(wchar_t *, wcschr, void *s, wchar_t wc, void *ps) {
+  ENSURE_MSAN_INITED();
+  wchar_t *res = REAL(wcschr)(s, wc, ps);
+  return res;
+}
+
+// wchar_t *wcscpy(wchar_t *dest, const wchar_t *src);
+INTERCEPTOR(wchar_t *, wcscpy, wchar_t *dest, const wchar_t *src) {
+  ENSURE_MSAN_INITED();
+  wchar_t *res = REAL(wcscpy)(dest, src);
+  __msan_copy_poison(dest, src, sizeof(wchar_t) * (REAL(wcslen)(src) + 1));
+  return res;
+}
+
+// wchar_t *wmemcpy(wchar_t *dest, const wchar_t *src, SIZE_T n);
+INTERCEPTOR(wchar_t *, wmemcpy, wchar_t *dest, const wchar_t *src, SIZE_T n) {
+  ENSURE_MSAN_INITED();
+  wchar_t *res = REAL(wmemcpy)(dest, src, n);
+  __msan_copy_poison(dest, src, n * sizeof(wchar_t));
+  return res;
+}
+
+INTERCEPTOR(wchar_t *, wmemset, wchar_t *s, wchar_t c, SIZE_T n) {
+  CHECK(MEM_IS_APP(s));
+  ENSURE_MSAN_INITED();
+  wchar_t *res = (wchar_t *)fast_memset(s, c, n * sizeof(wchar_t));
+  __msan_unpoison(s, n * sizeof(wchar_t));
+  return res;
+}
+
+INTERCEPTOR(wchar_t *, wmemmove, wchar_t *dest, const wchar_t *src, SIZE_T n) {
+  ENSURE_MSAN_INITED();
+  wchar_t *res = REAL(wmemmove)(dest, src, n);
+  __msan_move_poison(dest, src, n * sizeof(wchar_t));
+  return res;
+}
+
+INTERCEPTOR(int, wcscmp, const wchar_t *s1, const wchar_t *s2) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(wcscmp)(s1, s2);
+  return res;
+}
+
+INTERCEPTOR(double, wcstod, const wchar_t *nptr, wchar_t **endptr) {
+  ENSURE_MSAN_INITED();
+  double res = REAL(wcstod)(nptr, endptr);
+  __msan_unpoison(endptr, sizeof(*endptr));
+  return res;
+}
+
+// #define UNSUPPORTED(name) \
+//   INTERCEPTOR(void, name, void) {                     \
+//     Printf("MSAN: Unsupported %s\n", __FUNCTION__);   \
+//     Die();                                            \
+//   }
+
+// FIXME: intercept the following functions:
+// Note, they only matter when running without a dynamic tool.
+// UNSUPPORTED(wcscoll_l)
+// UNSUPPORTED(wcsnrtombs)
+// UNSUPPORTED(wcstol)
+// UNSUPPORTED(wcstoll)
+// UNSUPPORTED(wcstold)
+// UNSUPPORTED(wcstoul)
+// UNSUPPORTED(wcstoull)
+// UNSUPPORTED(wcsxfrm_l)
+// UNSUPPORTED(wcsdup)
+// UNSUPPORTED(wcsftime)
+// UNSUPPORTED(wcsstr)
+// UNSUPPORTED(wcsrchr)
+// UNSUPPORTED(wctob)
+
+INTERCEPTOR(int, gettimeofday, void *tv, void *tz) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(gettimeofday)(tv, tz);
+  if (tv)
+    __msan_unpoison(tv, 16);
+  if (tz)
+    __msan_unpoison(tz, 8);
+  return res;
+}
+
+INTERCEPTOR(char *, fcvt, double x, int a, int *b, int *c) {
+  ENSURE_MSAN_INITED();
+  char *res = REAL(fcvt)(x, a, b, c);
+  if (!__msan_has_dynamic_component()) {
+    __msan_unpoison(b, sizeof(*b));
+    __msan_unpoison(c, sizeof(*c));
+  }
+  return res;
+}
+
+INTERCEPTOR(char *, getenv, char *name) {
+  ENSURE_MSAN_INITED();
+  char *res = REAL(getenv)(name);
+  if (!__msan_has_dynamic_component()) {
+    if (res)
+      __msan_unpoison(res, REAL(strlen)(res) + 1);
+  }
+  return res;
+}
+
+INTERCEPTOR(int, __fxstat, int magic, int fd, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(__fxstat)(magic, fd, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_stat_sz);
+  return res;
+}
+
+INTERCEPTOR(int, __fxstat64, int magic, int fd, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(__fxstat64)(magic, fd, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_stat64_sz);
+  return res;
+}
+
+INTERCEPTOR(int, __xstat, int magic, char *path, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(__xstat)(magic, path, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_stat_sz);
+  return res;
+}
+
+INTERCEPTOR(int, __xstat64, int magic, char *path, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(__xstat64)(magic, path, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_stat64_sz);
+  return res;
+}
+
+INTERCEPTOR(int, __lxstat, int magic, char *path, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(__lxstat)(magic, path, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_stat_sz);
+  return res;
+}
+
+INTERCEPTOR(int, __lxstat64, int magic, char *path, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(__lxstat64)(magic, path, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_stat64_sz);
+  return res;
+}
+
+INTERCEPTOR(int, pipe, int pipefd[2]) {
+  if (msan_init_is_running)
+    return REAL(pipe)(pipefd);
+  ENSURE_MSAN_INITED();
+  int res = REAL(pipe)(pipefd);
+  if (!res)
+    __msan_unpoison(pipefd, sizeof(int[2]));
+  return res;
+}
+
+INTERCEPTOR(int, wait, int *status) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(wait)(status);
+  if (status)
+    __msan_unpoison(status, sizeof(*status));
+  return res;
+}
+
+INTERCEPTOR(int, waitpid, int pid, int *status, int options) {
+  if (msan_init_is_running)
+    return REAL(waitpid)(pid, status, options);
+  ENSURE_MSAN_INITED();
+  int res = REAL(waitpid)(pid, status, options);
+  if (status)
+    __msan_unpoison(status, sizeof(*status));
+  return res;
+}
+
+INTERCEPTOR(char *, fgets, char *s, int size, void *stream) {
+  ENSURE_MSAN_INITED();
+  char *res = REAL(fgets)(s, size, stream);
+  if (res)
+    __msan_unpoison(s, REAL(strlen)(s) + 1);
+  return res;
+}
+
+INTERCEPTOR(char *, fgets_unlocked, char *s, int size, void *stream) {
+  ENSURE_MSAN_INITED();
+  char *res = REAL(fgets_unlocked)(s, size, stream);
+  if (res)
+    __msan_unpoison(s, REAL(strlen)(s) + 1);
+  return res;
+}
+
+INTERCEPTOR(char *, getcwd, char *buf, SIZE_T size) {
+  ENSURE_MSAN_INITED();
+  char *res = REAL(getcwd)(buf, size);
+  if (res)
+    __msan_unpoison(res, REAL(strlen)(res) + 1);
+  return res;
+}
+
+INTERCEPTOR(char *, realpath, char *path, char *abspath) {
+  ENSURE_MSAN_INITED();
+  char *res = REAL(realpath)(path, abspath);
+  if (res)
+    __msan_unpoison(abspath, REAL(strlen)(abspath) + 1);
+  return res;
+}
+
+INTERCEPTOR(int, getrlimit, int resource, void *rlim) {
+  if (msan_init_is_running)
+    return REAL(getrlimit)(resource, rlim);
+  ENSURE_MSAN_INITED();
+  int res = REAL(getrlimit)(resource, rlim);
+  if (!res)
+    __msan_unpoison(rlim, __sanitizer::struct_rlimit_sz);
+  return res;
+}
+
+INTERCEPTOR(int, getrlimit64, int resource, void *rlim) {
+  if (msan_init_is_running)
+    return REAL(getrlimit64)(resource, rlim);
+  ENSURE_MSAN_INITED();
+  int res = REAL(getrlimit64)(resource, rlim);
+  if (!res)
+    __msan_unpoison(rlim, __sanitizer::struct_rlimit64_sz);
+  return res;
+}
+
+INTERCEPTOR(int, statfs, const char *s, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(statfs)(s, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_statfs_sz);
+  return res;
+}
+
+INTERCEPTOR(int, fstatfs, int fd, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(fstatfs)(fd, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_statfs_sz);
+  return res;
+}
+
+INTERCEPTOR(int, statfs64, const char *s, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(statfs64)(s, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_statfs64_sz);
+  return res;
+}
+
+INTERCEPTOR(int, fstatfs64, int fd, void *buf) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(fstatfs64)(fd, buf);
+  if (!res)
+    __msan_unpoison(buf, __sanitizer::struct_statfs64_sz);
+  return res;
+}
+
+INTERCEPTOR(int, uname, void *utsname) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(uname)(utsname);
+  if (!res) {
+    __msan_unpoison(utsname, __sanitizer::struct_utsname_sz);
+  }
+  return res;
+}
+
+INTERCEPTOR(int, gethostname, char *name, SIZE_T len) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(gethostname)(name, len);
+  if (!res) {
+    SIZE_T real_len = REAL(strnlen)(name, len);
+    if (real_len < len)
+      ++real_len;
+    __msan_unpoison(name, real_len);
+  }
+  return res;
+}
+
+INTERCEPTOR(int, epoll_wait, int epfd, void *events, int maxevents,
+    int timeout) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(epoll_wait)(epfd, events, maxevents, timeout);
+  if (res > 0) {
+    __msan_unpoison(events, __sanitizer::struct_epoll_event_sz * res);
+  }
+  return res;
+}
+
+INTERCEPTOR(int, epoll_pwait, int epfd, void *events, int maxevents,
+    int timeout, void *sigmask) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(epoll_pwait)(epfd, events, maxevents, timeout, sigmask);
+  if (res > 0) {
+    __msan_unpoison(events, __sanitizer::struct_epoll_event_sz * res);
+  }
+  return res;
+}
+
+INTERCEPTOR(SSIZE_T, recv, int fd, void *buf, SIZE_T len, int flags) {
+  ENSURE_MSAN_INITED();
+  SSIZE_T res = REAL(recv)(fd, buf, len, flags);
+  if (res > 0)
+    __msan_unpoison(buf, res);
+  return res;
+}
+
+INTERCEPTOR(SSIZE_T, recvfrom, int fd, void *buf, SIZE_T len, int flags,
+    void *srcaddr, void *addrlen) {
+  ENSURE_MSAN_INITED();
+  SIZE_T srcaddr_sz;
+  if (srcaddr)
+    srcaddr_sz = __sanitizer_get_socklen_t(addrlen);
+  SSIZE_T res = REAL(recvfrom)(fd, buf, len, flags, srcaddr, addrlen);
+  if (res > 0) {
+    __msan_unpoison(buf, res);
+    if (srcaddr) {
+      SIZE_T sz = __sanitizer_get_socklen_t(addrlen);
+      __msan_unpoison(srcaddr, (sz < srcaddr_sz) ? sz : srcaddr_sz);
+    }
+  }
+  return res;
+}
+
+INTERCEPTOR(SSIZE_T, recvmsg, int fd, struct msghdr *msg, int flags) {
+  ENSURE_MSAN_INITED();
+  SSIZE_T res = REAL(recvmsg)(fd, msg, flags);
+  if (res > 0) {
+    for (SIZE_T i = 0; i < __sanitizer_get_msghdr_iovlen(msg); ++i)
+      __msan_unpoison(__sanitizer_get_msghdr_iov_iov_base(msg, i),
+          __sanitizer_get_msghdr_iov_iov_len(msg, i));
+  }
+  return res;
+}
+
+INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) {
+  if (CallocShouldReturnNullDueToOverflow(size, nmemb)) return 0;
+  GET_MALLOC_STACK_TRACE;
+  if (!msan_inited) {
+    // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
+    const SIZE_T kCallocPoolSize = 1024;
+    static uptr calloc_memory_for_dlsym[kCallocPoolSize];
+    static SIZE_T allocated;
+    SIZE_T size_in_words = ((nmemb * size) + kWordSize - 1) / kWordSize;
+    void *mem = (void*)&calloc_memory_for_dlsym[allocated];
+    allocated += size_in_words;
+    CHECK(allocated < kCallocPoolSize);
+    return mem;
+  }
+  return MsanReallocate(&stack, 0, nmemb * size, sizeof(u64), true);
+}
+
+INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {
+  GET_MALLOC_STACK_TRACE;
+  return MsanReallocate(&stack, ptr, size, sizeof(u64), false);
+}
+
+INTERCEPTOR(void *, malloc, SIZE_T size) {
+  GET_MALLOC_STACK_TRACE;
+  return MsanReallocate(&stack, 0, size, sizeof(u64), false);
+}
+
+void __msan_allocated_memory(void* data, uptr size) {
+  GET_MALLOC_STACK_TRACE;
+  if (flags()->poison_in_malloc)
+    __msan_poison(data, size);
+  if (__msan_get_track_origins()) {
+    u32 stack_id = StackDepotPut(stack.trace, stack.size);
+    CHECK(stack_id);
+    CHECK_EQ((stack_id >> 31), 0);  // Higher bit is occupied by stack origins.
+    __msan_set_origin(data, size, stack_id);
+  }
+}
+
+INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags,
+            int fd, OFF_T offset) {
+  ENSURE_MSAN_INITED();
+  void *res = REAL(mmap)(addr, length, prot, flags, fd, offset);
+  if (res != (void*)-1)
+    __msan_unpoison(res, RoundUpTo(length, GetPageSize()));
+  return res;
+}
+
+INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags,
+            int fd, OFF64_T offset) {
+  ENSURE_MSAN_INITED();
+  void *res = REAL(mmap64)(addr, length, prot, flags, fd, offset);
+  if (res != (void*)-1)
+    __msan_unpoison(res, RoundUpTo(length, GetPageSize()));
+  return res;
+}
+
+struct dlinfo {
+  char *dli_fname;
+  void *dli_fbase;
+  char *dli_sname;
+  void *dli_saddr;
+};
+
+INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(dladdr)(addr, info);
+  if (res != 0) {
+    __msan_unpoison(info, sizeof(*info));
+    if (info->dli_fname)
+      __msan_unpoison(info->dli_fname, REAL(strlen)(info->dli_fname) + 1);
+    if (info->dli_sname)
+      __msan_unpoison(info->dli_sname, REAL(strlen)(info->dli_sname) + 1);
+  }
+  return res;
+}
+
+// dlopen() ultimately calls mmap() down inside the loader, which generally
+// doesn't participate in dynamic symbol resolution.  Therefore we won't
+// intercept its calls to mmap, and we have to hook it here.  The loader
+// initializes the module before returning, so without the dynamic component, we
+// won't be able to clear the shadow before the initializers.  Fixing this would
+// require putting our own initializer first to clear the shadow.
+INTERCEPTOR(void *, dlopen, const char *filename, int flag) {
+  ENSURE_MSAN_INITED();
+  EnterLoader();
+  link_map *map = (link_map *)REAL(dlopen)(filename, flag);
+  ExitLoader();
+  if (!__msan_has_dynamic_component()) {
+    // If msandr didn't clear the shadow before the initializers ran, we do it
+    // ourselves afterwards.
+    UnpoisonMappedDSO(map);
+  }
+  return (void *)map;
+}
+
+INTERCEPTOR(int, getrusage, int who, void *usage) {
+  ENSURE_MSAN_INITED();
+  int res = REAL(getrusage)(who, usage);
+  if (res == 0) {
+    __msan_unpoison(usage, __sanitizer::struct_rusage_sz);
+  }
+  return res;
+}
+
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
+    __msan_unpoison(ptr, size)
+#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) do { } while (false)
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \
+  do {                                           \
+    ctx = 0;                                     \
+    (void)ctx;                                   \
+    ENSURE_MSAN_INITED();                        \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd) do { } while (false)
+#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd) do { } while (false)
+#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
+  do { } while (false)  // FIXME
+#include "sanitizer_common/sanitizer_common_interceptors.inc"
+
+// static
+void *fast_memset(void *ptr, int c, SIZE_T n) {
+  // hack until we have a really fast internal_memset
+  if (sizeof(uptr) == 8 &&
+      (n % 8) == 0 &&
+      ((uptr)ptr % 8) == 0 &&
+      (c == 0 || c == -1)) {
+    // Printf("memset %p %zd %x\n", ptr, n, c);
+    uptr to_store = c ? -1L : 0L;
+    uptr *p = (uptr*)ptr;
+    for (SIZE_T i = 0; i < n / 8; i++)
+      p[i] = to_store;
+    return ptr;
+  }
+  return internal_memset(ptr, c, n);
+}
+
+// static
+void *fast_memcpy(void *dst, const void *src, SIZE_T n) {
+  // Same hack as in fast_memset above.
+  if (sizeof(uptr) == 8 &&
+      (n % 8) == 0 &&
+      ((uptr)dst % 8) == 0 &&
+      ((uptr)src % 8) == 0) {
+    uptr *d = (uptr*)dst;
+    uptr *s = (uptr*)src;
+    for (SIZE_T i = 0; i < n / 8; i++)
+      d[i] = s[i];
+    return dst;
+  }
+  return internal_memcpy(dst, src, n);
+}
+
+// These interface functions reside here so that they can use
+// fast_memset, etc.
+void __msan_unpoison(void *a, uptr size) {
+  if (!MEM_IS_APP(a)) return;
+  fast_memset((void*)MEM_TO_SHADOW((uptr)a), 0, size);
+}
+
+void __msan_poison(void *a, uptr size) {
+  if (!MEM_IS_APP(a)) return;
+  fast_memset((void*)MEM_TO_SHADOW((uptr)a),
+              __msan::flags()->poison_heap_with_zeroes ? 0 : -1, size);
+}
+
+void __msan_poison_stack(void *a, uptr size) {
+  if (!MEM_IS_APP(a)) return;
+  fast_memset((void*)MEM_TO_SHADOW((uptr)a),
+              __msan::flags()->poison_stack_with_zeroes ? 0 : -1, size);
+}
+
+void __msan_clear_and_unpoison(void *a, uptr size) {
+  fast_memset(a, 0, size);
+  fast_memset((void*)MEM_TO_SHADOW((uptr)a), 0, size);
+}
+
+void __msan_copy_origin(void *dst, const void *src, uptr size) {
+  if (!__msan_get_track_origins()) return;
+  if (!MEM_IS_APP(dst) || !MEM_IS_APP(src)) return;
+  uptr d = MEM_TO_ORIGIN(dst);
+  uptr s = MEM_TO_ORIGIN(src);
+  uptr beg = d & ~3UL;  // align down.
+  uptr end = (d + size + 3) & ~3UL;  // align up.
+  s = s & ~3UL;  // align down.
+  fast_memcpy((void*)beg, (void*)s, end - beg);
+}
+
+void __msan_copy_poison(void *dst, const void *src, uptr size) {
+  if (!MEM_IS_APP(dst)) return;
+  if (!MEM_IS_APP(src)) return;
+  fast_memcpy((void*)MEM_TO_SHADOW((uptr)dst),
+              (void*)MEM_TO_SHADOW((uptr)src), size);
+  __msan_copy_origin(dst, src, size);
+}
+
+void __msan_move_poison(void *dst, const void *src, uptr size) {
+  if (!MEM_IS_APP(dst)) return;
+  if (!MEM_IS_APP(src)) return;
+  internal_memmove((void*)MEM_TO_SHADOW((uptr)dst),
+         (void*)MEM_TO_SHADOW((uptr)src), size);
+  __msan_copy_origin(dst, src, size);
+}
+
+void *__msan_memcpy(void *dest, const void *src, SIZE_T n) {
+  ENSURE_MSAN_INITED();
+  void *res = fast_memcpy(dest, src, n);
+  __msan_copy_poison(dest, src, n);
+  return res;
+}
+
+void *__msan_memset(void *s, int c, SIZE_T n) {
+  ENSURE_MSAN_INITED();
+  void *res = fast_memset(s, c, n);
+  __msan_unpoison(s, n);
+  return res;
+}
+
+void *__msan_memmove(void *dest, const void *src, SIZE_T n) {
+  ENSURE_MSAN_INITED();
+  void *res = REAL(memmove)(dest, src, n);
+  __msan_move_poison(dest, src, n);
+  return res;
+}
+
+namespace __msan {
+void InitializeInterceptors() {
+  static int inited = 0;
+  CHECK_EQ(inited, 0);
+  SANITIZER_COMMON_INTERCEPTORS_INIT;
+
+  INTERCEPT_FUNCTION(mmap);
+  INTERCEPT_FUNCTION(mmap64);
+  INTERCEPT_FUNCTION(posix_memalign);
+  INTERCEPT_FUNCTION(malloc);
+  INTERCEPT_FUNCTION(calloc);
+  INTERCEPT_FUNCTION(realloc);
+  INTERCEPT_FUNCTION(free);
+  INTERCEPT_FUNCTION(fread);
+  INTERCEPT_FUNCTION(fread_unlocked);
+  INTERCEPT_FUNCTION(readlink);
+  INTERCEPT_FUNCTION(readdir);
+  INTERCEPT_FUNCTION(memcpy);
+  INTERCEPT_FUNCTION(memset);
+  INTERCEPT_FUNCTION(memmove);
+  INTERCEPT_FUNCTION(wmemset);
+  INTERCEPT_FUNCTION(wmemcpy);
+  INTERCEPT_FUNCTION(wmemmove);
+  INTERCEPT_FUNCTION(strcpy);  // NOLINT
+  INTERCEPT_FUNCTION(strdup);
+  INTERCEPT_FUNCTION(strncpy);  // NOLINT
+  INTERCEPT_FUNCTION(strlen);
+  INTERCEPT_FUNCTION(strnlen);
+  INTERCEPT_FUNCTION(gcvt);
+  INTERCEPT_FUNCTION(strcat);  // NOLINT
+  INTERCEPT_FUNCTION(strncat);  // NOLINT
+  INTERCEPT_FUNCTION(strtol);
+  INTERCEPT_FUNCTION(strtoll);
+  INTERCEPT_FUNCTION(strtoul);
+  INTERCEPT_FUNCTION(strtoull);
+  INTERCEPT_FUNCTION(strtod);
+  INTERCEPT_FUNCTION(strtof);
+  INTERCEPT_FUNCTION(strtold);
+  INTERCEPT_FUNCTION(vsprintf);
+  INTERCEPT_FUNCTION(vsnprintf);
+  INTERCEPT_FUNCTION(vswprintf);
+  INTERCEPT_FUNCTION(sprintf);  // NOLINT
+  INTERCEPT_FUNCTION(snprintf);
+  INTERCEPT_FUNCTION(swprintf);
+  INTERCEPT_FUNCTION(strftime);
+  INTERCEPT_FUNCTION(wcstombs);
+  INTERCEPT_FUNCTION(mbstowcs);
+  INTERCEPT_FUNCTION(wcslen);
+  INTERCEPT_FUNCTION(wcschr);
+  INTERCEPT_FUNCTION(wcscpy);
+  INTERCEPT_FUNCTION(wcscmp);
+  INTERCEPT_FUNCTION(wcstod);
+  INTERCEPT_FUNCTION(getenv);
+  INTERCEPT_FUNCTION(gettimeofday);
+  INTERCEPT_FUNCTION(fcvt);
+  INTERCEPT_FUNCTION(__fxstat);
+  INTERCEPT_FUNCTION(__xstat);
+  INTERCEPT_FUNCTION(__lxstat);
+  INTERCEPT_FUNCTION(__fxstat64);
+  INTERCEPT_FUNCTION(__xstat64);
+  INTERCEPT_FUNCTION(__lxstat64);
+  INTERCEPT_FUNCTION(pipe);
+  INTERCEPT_FUNCTION(wait);
+  INTERCEPT_FUNCTION(waitpid);
+  INTERCEPT_FUNCTION(fgets);
+  INTERCEPT_FUNCTION(fgets_unlocked);
+  INTERCEPT_FUNCTION(getcwd);
+  INTERCEPT_FUNCTION(realpath);
+  INTERCEPT_FUNCTION(getrlimit);
+  INTERCEPT_FUNCTION(getrlimit64);
+  INTERCEPT_FUNCTION(statfs);
+  INTERCEPT_FUNCTION(fstatfs);
+  INTERCEPT_FUNCTION(statfs64);
+  INTERCEPT_FUNCTION(fstatfs64);
+  INTERCEPT_FUNCTION(uname);
+  INTERCEPT_FUNCTION(gethostname);
+  INTERCEPT_FUNCTION(epoll_wait);
+  INTERCEPT_FUNCTION(epoll_pwait);
+  INTERCEPT_FUNCTION(recv);
+  INTERCEPT_FUNCTION(recvfrom);
+  INTERCEPT_FUNCTION(recvmsg);
+  INTERCEPT_FUNCTION(dladdr);
+  INTERCEPT_FUNCTION(dlopen);
+  INTERCEPT_FUNCTION(getrusage);
+  inited = 1;
+}
+}  // namespace __msan
diff --git a/lib/msan/msan_interface_internal.h b/lib/msan/msan_interface_internal.h
new file mode 100644
index 0000000..e1cd13c
--- /dev/null
+++ b/lib/msan/msan_interface_internal.h
@@ -0,0 +1,125 @@
+//===-- msan_interface_internal.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// Private MSan interface header.
+//===----------------------------------------------------------------------===//
+
+#ifndef MSAN_INTERFACE_INTERNAL_H
+#define MSAN_INTERFACE_INTERNAL_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+extern "C" {
+// FIXME: document all interface functions.
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __msan_get_track_origins();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_init();
+
+// Print a warning and maybe return.
+// This function can die based on flags()->exit_code.
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_warning();
+
+// Print a warning and die.
+// Intrumentation inserts calls to this function when building in "fast" mode
+// (i.e. -mllvm -msan-keep-going)
+SANITIZER_INTERFACE_ATTRIBUTE __attribute__((noreturn))
+void __msan_warning_noreturn();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_unpoison(void *a, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_clear_and_unpoison(void *a, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void* __msan_memcpy(void *dst, const void *src, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void* __msan_memset(void *s, int c, uptr n);
+SANITIZER_INTERFACE_ATTRIBUTE
+void* __msan_memmove(void* dest, const void* src, uptr n);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_copy_poison(void *dst, const void *src, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_copy_origin(void *dst, const void *src, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_move_poison(void *dst, const void *src, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_poison(void *a, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_poison_stack(void *a, uptr size);
+
+// Copy size bytes from src to dst and unpoison the result.
+// Useful to implement unsafe loads.
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_load_unpoisoned(void *src, uptr size, void *dst);
+
+// Returns the offset of the first (at least partially) poisoned byte,
+// or -1 if the whole range is good.
+SANITIZER_INTERFACE_ATTRIBUTE
+sptr __msan_test_shadow(const void *x, uptr size);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_set_origin(void *a, uptr size, u32 origin);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_set_alloca_origin(void *a, uptr size, const char *descr);
+SANITIZER_INTERFACE_ATTRIBUTE
+u32 __msan_get_origin(void *a);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_clear_on_return();
+
+// Default: -1 (don't exit on error).
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_set_exit_code(int exit_code);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __msan_set_poison_in_malloc(int do_poison);
+
+SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
+/* OPTIONAL */ const char* __msan_default_options();
+
+// For testing.
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_set_expect_umr(int expect_umr);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_print_shadow(const void *x, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_print_param_shadow();
+SANITIZER_INTERFACE_ATTRIBUTE
+int  __msan_has_dynamic_component();
+
+// Returns x such that %fs:x is the first byte of __msan_retval_tls.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __msan_get_retval_tls_offset();
+SANITIZER_INTERFACE_ATTRIBUTE
+int __msan_get_param_tls_offset();
+
+// For intercepting mmap from ld.so in msandr.
+SANITIZER_INTERFACE_ATTRIBUTE
+bool __msan_is_in_loader();
+
+// For testing.
+SANITIZER_INTERFACE_ATTRIBUTE
+u32 __msan_get_umr_origin();
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__msan_get_origin_descr_if_stack(u32 id);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_partial_poison(void* data, void* shadow, uptr size);
+
+// Tell MSan about newly allocated memory (ex.: custom allocator).
+// Memory will be marked uninitialized, with origin at the call site.
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_allocated_memory(void* data, uptr size);
+}  // extern "C"
+
+#endif  // MSAN_INTERFACE_INTERNAL_H
diff --git a/lib/msan/msan_linux.cc b/lib/msan/msan_linux.cc
new file mode 100644
index 0000000..cda23b1
--- /dev/null
+++ b/lib/msan/msan_linux.cc
@@ -0,0 +1,131 @@
+//===-- msan_linux.cc -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// Linux-specific code.
+//===----------------------------------------------------------------------===//
+
+#ifdef __linux__
+
+#include "msan.h"
+
+#include <algorithm>
+#include <elf.h>
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <unwind.h>
+#include <execinfo.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+namespace __msan {
+
+static const uptr kMemBeg     = 0x600000000000;
+static const uptr kMemEnd     = 0x7fffffffffff;
+static const uptr kShadowBeg  = MEM_TO_SHADOW(kMemBeg);
+static const uptr kShadowEnd  = MEM_TO_SHADOW(kMemEnd);
+static const uptr kBad1Beg    = 0x100000000;  // 4G
+static const uptr kBad1End    = kShadowBeg - 1;
+static const uptr kBad2Beg    = kShadowEnd + 1;
+static const uptr kBad2End    = kMemBeg - 1;
+static const uptr kOriginsBeg = kBad2Beg;
+static const uptr kOriginsEnd = kBad2End;
+
+bool InitShadow(bool prot1, bool prot2, bool map_shadow, bool init_origins) {
+  if (flags()->verbosity) {
+    Printf("__msan_init %p\n", &__msan_init);
+    Printf("Memory   : %p %p\n", kMemBeg, kMemEnd);
+    Printf("Bad2     : %p %p\n", kBad2Beg, kBad2End);
+    Printf("Origins  : %p %p\n", kOriginsBeg, kOriginsEnd);
+    Printf("Shadow   : %p %p\n", kShadowBeg, kShadowEnd);
+    Printf("Bad1     : %p %p\n", kBad1Beg, kBad1End);
+  }
+
+  if (!MemoryRangeIsAvailable(kShadowBeg,
+                              init_origins ? kOriginsEnd : kShadowEnd)) {
+    Printf("FATAL: Shadow memory range is not available.\n");
+    return false;
+  }
+
+  if (prot1 && !Mprotect(kBad1Beg, kBad1End - kBad1Beg))
+    return false;
+  if (prot2 && !Mprotect(kBad2Beg, kBad2End - kBad2Beg))
+    return false;
+  if (map_shadow) {
+    void *shadow = MmapFixedNoReserve(kShadowBeg, kShadowEnd - kShadowBeg);
+    if (shadow != (void*)kShadowBeg) return false;
+  }
+  if (init_origins) {
+    void *origins = MmapFixedNoReserve(kOriginsBeg, kOriginsEnd - kOriginsBeg);
+    if (origins != (void*)kOriginsBeg) return false;
+  }
+  return true;
+}
+
+void MsanDie() {
+  _exit(flags()->exit_code);
+}
+
+static void MsanAtExit(void) {
+  if (msan_report_count > 0) {
+    ReportAtExitStatistics();
+    if (flags()->exit_code)
+      _exit(flags()->exit_code);
+  }
+}
+
+void InstallAtExitHandler() {
+  atexit(MsanAtExit);
+}
+
+void UnpoisonMappedDSO(link_map *map) {
+  typedef ElfW(Phdr) Elf_Phdr;
+  typedef ElfW(Ehdr) Elf_Ehdr;
+  char *base = (char *)map->l_addr;
+  Elf_Ehdr *ehdr = (Elf_Ehdr *)base;
+  char *phdrs = base + ehdr->e_phoff;
+  char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize;
+
+  // Find the segment with the minimum base so we can "relocate" the p_vaddr
+  // fields.  Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC
+  // objects have a non-zero base.
+  uptr preferred_base = ~0ULL;
+  for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+    Elf_Phdr *phdr = (Elf_Phdr *)iter;
+    if (phdr->p_type == PT_LOAD)
+      preferred_base = std::min(preferred_base, (uptr)phdr->p_vaddr);
+  }
+
+  // Compute the delta from the real base to get a relocation delta.
+  sptr delta = (uptr)base - preferred_base;
+  // Now we can figure out what the loader really mapped.
+  for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+    Elf_Phdr *phdr = (Elf_Phdr *)iter;
+    if (phdr->p_type == PT_LOAD) {
+      uptr seg_start = phdr->p_vaddr + delta;
+      uptr seg_end = seg_start + phdr->p_memsz;
+      // None of these values are aligned.  We consider the ragged edges of the
+      // load command as defined, since they are mapped from the file.
+      seg_start = RoundDownTo(seg_start, GetPageSizeCached());
+      seg_end = RoundUpTo(seg_end, GetPageSizeCached());
+      __msan_unpoison((void *)seg_start, seg_end - seg_start);
+    }
+  }
+}
+
+}  // namespace __msan
+
+#endif  // __linux__
diff --git a/lib/msan/msan_new_delete.cc b/lib/msan/msan_new_delete.cc
new file mode 100644
index 0000000..c4efe2e
--- /dev/null
+++ b/lib/msan/msan_new_delete.cc
@@ -0,0 +1,51 @@
+//===-- msan_new_delete.cc ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// Interceptors for operators new and delete.
+//===----------------------------------------------------------------------===//
+
+#include "msan.h"
+
+#include <stddef.h>
+
+namespace __msan {
+// This function is a no-op. We need it to make sure that object file
+// with our replacements will actually be loaded from static MSan
+// run-time library at link-time.
+void ReplaceOperatorsNewAndDelete() { }
+}
+
+using namespace __msan;  // NOLINT
+
+// Fake std::nothrow_t to avoid including <new>.
+namespace std {
+  struct nothrow_t {};
+}  // namespace std
+
+
+#define OPERATOR_NEW_BODY \
+  GET_MALLOC_STACK_TRACE; \
+  return MsanReallocate(&stack, 0, size, sizeof(u64), false)
+
+void *operator new(size_t size) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+
+#define OPERATOR_DELETE_BODY \
+  if (ptr) MsanDeallocate(ptr)
+
+void operator delete(void *ptr) { OPERATOR_DELETE_BODY; }
+void operator delete[](void *ptr) { OPERATOR_DELETE_BODY; }
+void operator delete(void *ptr, std::nothrow_t const&) { OPERATOR_DELETE_BODY; }
+void operator delete[](void *ptr, std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY;
+}
diff --git a/lib/msan/msan_report.cc b/lib/msan/msan_report.cc
new file mode 100644
index 0000000..df6990f
--- /dev/null
+++ b/lib/msan/msan_report.cc
@@ -0,0 +1,120 @@
+//===-- msan_report.cc ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// Error reporting.
+//===----------------------------------------------------------------------===//
+
+#include "msan.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
+
+using namespace __sanitizer;
+
+static StaticSpinMutex report_mu;
+
+namespace __msan {
+
+static bool PrintsToTtyCached() {
+  static int cached = 0;
+  static bool prints_to_tty;
+  if (!cached) {  // Ok wrt threads since we are printing only from one thread.
+    prints_to_tty = PrintsToTty();
+    cached = 1;
+  }
+  return prints_to_tty;
+}
+
+class Decorator: private __sanitizer::AnsiColorDecorator {
+ public:
+  Decorator() : __sanitizer::AnsiColorDecorator(PrintsToTtyCached()) { }
+  const char *Warning()    { return Red(); }
+  const char *Origin()     { return Magenta(); }
+  const char *Name()   { return Green(); }
+  const char *End()    { return Default(); }
+};
+
+static void PrintStack(const uptr *trace, uptr size) {
+  SymbolizerScope sym_scope;
+  StackTrace::PrintStack(trace, size, true, flags()->strip_path_prefix, 0);
+}
+
+static void DescribeOrigin(u32 origin) {
+  Decorator d;
+  if (flags()->verbosity)
+    Printf("  raw origin id: %d\n", origin);
+  if (const char *so = __msan_get_origin_descr_if_stack(origin)) {
+    char* s = internal_strdup(so);
+    char* sep = internal_strchr(s, '@');
+    CHECK(sep);
+    *sep = '\0';
+    Printf("%s", d.Origin());
+    Printf("  %sUninitialized value was created by an allocation of '%s%s%s'"
+           " in the stack frame of function '%s%s%s'%s\n",
+           d.Origin(), d.Name(), s, d.Origin(), d.Name(), sep + 1,
+           d.Origin(), d.End());
+    InternalFree(s);
+  } else {
+    uptr size = 0;
+    const uptr *trace = StackDepotGet(origin, &size);
+    Printf("  %sUninitialized value was created by a heap allocation%s\n",
+           d.Origin(), d.End());
+    PrintStack(trace, size);
+  }
+}
+
+static void ReportSummary(const char *error_type, StackTrace *stack) {
+  if (!stack->size || !IsSymbolizerAvailable()) return;
+  AddressInfo ai;
+  uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[0]);
+  {
+    SymbolizerScope sym_scope;
+    SymbolizeCode(pc, &ai, 1);
+  }
+  ReportErrorSummary(error_type,
+                     StripPathPrefix(ai.file, flags()->strip_path_prefix),
+                     ai.line, ai.function);
+}
+
+void ReportUMR(StackTrace *stack, u32 origin) {
+  if (!__msan::flags()->report_umrs) return;
+
+  GenericScopedLock<StaticSpinMutex> lock(&report_mu);
+
+  Decorator d;
+  Printf("%s", d.Warning());
+  Report(" WARNING: Use of uninitialized value\n");
+  Printf("%s", d.End());
+  PrintStack(stack->trace, stack->size);
+  if (origin) {
+    DescribeOrigin(origin);
+  }
+  ReportSummary("use-of-uninitialized-value", stack);
+}
+
+void ReportExpectedUMRNotFound(StackTrace *stack) {
+  GenericScopedLock<StaticSpinMutex> lock(&report_mu);
+
+  Printf(" WARNING: Expected use of uninitialized value not found\n");
+  PrintStack(stack->trace, stack->size);
+}
+
+void ReportAtExitStatistics() {
+  Decorator d;
+  Printf("%s", d.Warning());
+  Printf("MemorySanitizer: %d warnings reported.\n", msan_report_count);
+  Printf("%s", d.End());
+}
+
+
+}  // namespace __msan
diff --git a/lib/msan/tests/CMakeLists.txt b/lib/msan/tests/CMakeLists.txt
new file mode 100644
index 0000000..813aad0
--- /dev/null
+++ b/lib/msan/tests/CMakeLists.txt
@@ -0,0 +1,171 @@
+include(CheckCXXCompilerFlag)
+include(CompilerRTCompile)
+include(CompilerRTLink)
+
+include_directories(..)
+include_directories(../..)
+
+# Instrumented libcxx sources and build flags.
+set(MSAN_LIBCXX_PATH ${LLVM_MAIN_SRC_DIR}/projects/libcxx)
+file(GLOB MSAN_LIBCXX_SOURCES ${MSAN_LIBCXX_PATH}/src/*.cpp)
+set(MSAN_LIBCXX_CFLAGS
+  -I${MSAN_LIBCXX_PATH}/include
+  -fsanitize=memory
+  -fsanitize-memory-track-origins
+  -fPIC
+  -Wno-\#warnings
+  -g
+  -O2
+  -std=c++0x
+  -fstrict-aliasing
+  -fno-exceptions
+  -nostdinc++
+  -fno-omit-frame-pointer
+  -mno-omit-leaf-frame-pointer)
+set(MSAN_LIBCXX_LINK_FLAGS
+  -nodefaultlibs
+  -lpthread
+  -lrt
+  -lc
+  -lstdc++
+  -fsanitize=memory)
+
+# Unittest sources and build flags.
+set(MSAN_UNITTEST_SOURCE msan_test.cc)
+set(MSAN_LOADABLE_SOURCE msan_loadable.cc)
+set(MSAN_UNITTEST_HEADERS
+  msandr_test_so.h
+  ../../../include/sanitizer/msan_interface.h
+)
+set(MSANDR_UNITTEST_SOURCE msandr_test_so.cc)
+set(MSAN_UNITTEST_COMMON_CFLAGS
+  -I${MSAN_LIBCXX_PATH}/include
+  ${COMPILER_RT_GTEST_INCLUDE_CFLAGS}
+  -I${COMPILER_RT_SOURCE_DIR}/include
+  -I${COMPILER_RT_SOURCE_DIR}/lib
+  -I${COMPILER_RT_SOURCE_DIR}/lib/msan
+  -std=c++0x
+  -stdlib=libc++
+  -fPIE
+  -g
+  -O2
+  -fno-exceptions
+  -fno-omit-frame-pointer
+  -mno-omit-leaf-frame-pointer
+)
+set(MSAN_UNITTEST_INSTRUMENTED_CFLAGS
+  ${MSAN_UNITTEST_COMMON_CFLAGS}
+  -fsanitize=memory
+  -fsanitize-memory-track-origins
+  -mllvm -msan-keep-going=1
+)
+set(MSAN_UNITTEST_LINK_FLAGS
+  -fsanitize=memory
+  -pie
+  -ldl
+  # FIXME: we build libcxx without cxxabi and need libstdc++ to provide it.
+  -lstdc++
+)
+set(MSAN_LOADABLE_LINK_FLAGS
+  -fsanitize=memory
+  -shared
+)
+
+# Compile source for the given architecture, using compiler
+# options in ${ARGN}, and add it to the object list.
+macro(msan_compile obj_list source arch)
+  get_filename_component(basename ${source} NAME)
+  set(output_obj "${basename}.${arch}.o")
+  get_target_flags_for_arch(${arch} TARGET_CFLAGS)
+  clang_compile(${output_obj} ${source}
+                CFLAGS ${ARGN} ${TARGET_CFLAGS}
+                DEPS gtest ${MSAN_RUNTIME_LIBRARIES} ${MSAN_UNITTEST_HEADERS})
+  list(APPEND ${obj_list} ${output_obj})
+endmacro()
+
+macro(msan_link_shared so_list so_name arch)
+  parse_arguments(SOURCE "OBJECTS;LINKFLAGS;DEPS" "" ${ARGN})
+  set(output_so "${CMAKE_CURRENT_BINARY_DIR}/${so_name}.${arch}.so")
+  get_target_flags_for_arch(${arch} TARGET_LINKFLAGS)
+  clang_link_shared(${output_so}
+                OBJECTS ${SOURCE_OBJECTS}
+                LINKFLAGS ${TARGET_LINKFLAGS} ${SOURCE_LINKFLAGS}
+                DEPS ${SOURCE_DEPS})
+  list(APPEND ${so_list} ${output_so})
+endmacro()
+
+# Link MSan unit test for a given architecture from a set
+# of objects in ${ARGN}.
+macro(add_msan_test test_suite test_name arch)
+  get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
+  add_compiler_rt_test(${test_suite} ${test_name}
+                       OBJECTS ${ARGN}
+                       DEPS ${MSAN_RUNTIME_LIBRARIES} ${ARGN}
+                            ${MSAN_LOADABLE_SO}
+                       LINK_FLAGS ${MSAN_UNITTEST_LINK_FLAGS}
+                                  ${TARGET_LINK_FLAGS}
+                                  "-Wl,-rpath=${CMAKE_CURRENT_BINARY_DIR}")
+endmacro()
+
+# Main MemorySanitizer unit tests.
+add_custom_target(MsanUnitTests)
+set_target_properties(MsanUnitTests PROPERTIES FOLDER "MSan unit tests")
+
+# Adds MSan unit tests and benchmarks for architecture.
+macro(add_msan_tests_for_arch arch)
+  # Build gtest instrumented with MSan.
+  set(MSAN_INST_GTEST)
+  msan_compile(MSAN_INST_GTEST ${COMPILER_RT_GTEST_SOURCE} ${arch} 
+                               ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
+
+  # Build libcxx instrumented with MSan.
+  set(MSAN_INST_LIBCXX_OBJECTS)
+  foreach(SOURCE ${MSAN_LIBCXX_SOURCES})
+    msan_compile(MSAN_INST_LIBCXX_OBJECTS ${SOURCE} ${arch} 
+                 ${MSAN_LIBCXX_CFLAGS})
+  endforeach(SOURCE)
+
+  set(MSAN_INST_LIBCXX)
+  msan_link_shared(MSAN_INST_LIBCXX "libcxx" ${arch}
+                   OBJECTS ${MSAN_INST_LIBCXX_OBJECTS}
+                   LINKFLAGS ${MSAN_LIBCXX_LINK_FLAGS}
+                   DEPS ${MSAN_INST_LIBCXX_OBJECTS} ${MSAN_RUNTIME_LIBRARIES})
+
+  # Instrumented tests.
+  set(MSAN_INST_TEST_OBJECTS)
+  msan_compile(MSAN_INST_TEST_OBJECTS ${MSAN_UNITTEST_SOURCE} ${arch}
+               ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
+
+  # Instrumented loadable module objects.
+  set(MSAN_INST_LOADABLE_OBJECTS)
+  msan_compile(MSAN_INST_LOADABLE_OBJECTS ${MSAN_LOADABLE_SOURCE} ${arch}
+               ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
+
+  # Uninstrumented shared object for MSanDR tests.
+  set(MSANDR_TEST_OBJECTS)
+  msan_compile(MSANDR_TEST_OBJECTS ${MSANDR_UNITTEST_SOURCE} ${arch}
+               ${MSAN_UNITTEST_COMMON_CFLAGS})
+
+  # Instrumented loadable library tests.
+  set(MSAN_LOADABLE_SO)
+  msan_link_shared(MSAN_LOADABLE_SO "libmsan_loadable" ${arch}
+                   OBJECTS ${MSAN_INST_LOADABLE_OBJECTS}
+                   DEPS ${MSAN_INST_LOADABLE_OBJECTS} ${MSAN_RUNTIME_LIBRARIES})
+
+  # Uninstrumented shared library tests.
+  set(MSANDR_TEST_SO)
+  msan_link_shared(MSANDR_TEST_SO "libmsandr_test" ${arch}
+                   OBJECTS ${MSANDR_TEST_OBJECTS}
+                   DEPS ${MSANDR_TEST_OBJECTS} ${MSAN_RUNTIME_LIBRARIES})
+
+  # Link everything together.
+  add_msan_test(MsanUnitTests "Msan-${arch}-Test" ${arch}
+                ${MSAN_INST_TEST_OBJECTS} ${MSAN_INST_GTEST}
+                ${MSAN_INST_LIBCXX} ${MSANDR_TEST_SO})
+endmacro()
+
+if(COMPILER_RT_CAN_EXECUTE_TESTS AND EXISTS ${MSAN_LIBCXX_PATH}/)
+  if(CAN_TARGET_x86_64)
+    add_msan_tests_for_arch(x86_64)
+  endif()
+endif()
diff --git a/lib/msan/tests/msan_loadable.cc b/lib/msan/tests/msan_loadable.cc
new file mode 100644
index 0000000..db3bf48
--- /dev/null
+++ b/lib/msan/tests/msan_loadable.cc
@@ -0,0 +1,45 @@
+//===-- msan_loadable.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer unit tests.
+//===----------------------------------------------------------------------===//
+
+#include "msan/msan_interface_internal.h"
+#include <stdlib.h>
+
+static void *dso_global;
+
+// No name mangling.
+extern "C" {
+
+__attribute__((constructor))
+void loadable_module_init(void) {
+  if (!__msan_has_dynamic_component())
+    return;
+  // The real test is that this compare should not make an uninit.
+  if (dso_global == NULL)
+    dso_global = malloc(4);
+}
+
+__attribute__((destructor))
+void loadable_module_fini(void) {
+  if (!__msan_has_dynamic_component())
+    return;
+  free(dso_global);
+  // *Don't* overwrite it with NULL!  That would unpoison it, but our test
+  // relies on reloading at the same address and keeping the poison.
+}
+
+void **get_dso_global() {
+  return &dso_global;
+}
+
+}
diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc
new file mode 100644
index 0000000..c1040d5
--- /dev/null
+++ b/lib/msan/tests/msan_test.cc
@@ -0,0 +1,1851 @@
+//===-- msan_test.cc ------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer unit tests.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer/msan_interface.h"
+#include "msandr_test_so.h"
+#include "gtest/gtest.h"
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <assert.h>
+#include <wchar.h>
+
+#include <dlfcn.h>
+#include <unistd.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <sys/mman.h>
+#include <sys/vfs.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+# include <emmintrin.h>
+# define MSAN_HAS_M128 1
+#else
+# define MSAN_HAS_M128 0
+#endif
+
+typedef unsigned char      U1;
+typedef unsigned short     U2;  // NOLINT
+typedef unsigned int       U4;
+typedef unsigned long long U8;  // NOLINT
+typedef   signed char      S1;
+typedef   signed short     S2;  // NOLINT
+typedef   signed int       S4;
+typedef   signed long long S8;  // NOLINT
+#define NOINLINE      __attribute__((noinline))
+#define INLINE      __attribute__((always_inline))
+
+static bool TrackingOrigins() {
+  S8 x;
+  __msan_set_origin(&x, sizeof(x), 0x1234);
+  U4 origin = __msan_get_origin(&x);
+  __msan_set_origin(&x, sizeof(x), 0);
+  return origin == 0x1234;
+}
+
+#define EXPECT_UMR(action) \
+    do {                        \
+      __msan_set_expect_umr(1); \
+      action;                   \
+      __msan_set_expect_umr(0); \
+    } while (0)
+
+#define EXPECT_UMR_O(action, origin) \
+    do {                                            \
+      __msan_set_expect_umr(1);                     \
+      action;                                       \
+      __msan_set_expect_umr(0);                     \
+      if (TrackingOrigins())                        \
+        EXPECT_EQ(origin, __msan_get_umr_origin()); \
+    } while (0)
+
+#define EXPECT_UMR_S(action, stack_origin) \
+    do {                                            \
+      __msan_set_expect_umr(1);                     \
+      action;                                       \
+      __msan_set_expect_umr(0);                     \
+      U4 id = __msan_get_umr_origin();             \
+      const char *str = __msan_get_origin_descr_if_stack(id); \
+      if (!str || strcmp(str, stack_origin)) {      \
+        fprintf(stderr, "EXPECT_POISONED_S: id=%u %s, %s", \
+                id, stack_origin, str);  \
+        EXPECT_EQ(1, 0);                            \
+      }                                             \
+    } while (0)
+
+#define EXPECT_POISONED(x) ExpectPoisoned(x)
+
+template<typename T>
+void ExpectPoisoned(const T& t) {
+  EXPECT_NE(-1, __msan_test_shadow((void*)&t, sizeof(t)));
+}
+
+#define EXPECT_POISONED_O(x, origin) \
+  ExpectPoisonedWithOrigin(x, origin)
+
+template<typename T>
+void ExpectPoisonedWithOrigin(const T& t, unsigned origin) {
+  EXPECT_NE(-1, __msan_test_shadow((void*)&t, sizeof(t)));
+  if (TrackingOrigins())
+    EXPECT_EQ(origin, __msan_get_origin((void*)&t));
+}
+
+#define EXPECT_POISONED_S(x, stack_origin) \
+  ExpectPoisonedWithStackOrigin(x, stack_origin)
+
+template<typename T>
+void ExpectPoisonedWithStackOrigin(const T& t, const char *stack_origin) {
+  EXPECT_NE(-1, __msan_test_shadow((void*)&t, sizeof(t)));
+  U4 id = __msan_get_origin((void*)&t);
+  const char *str = __msan_get_origin_descr_if_stack(id);
+  if (!str || strcmp(str, stack_origin)) {
+    fprintf(stderr, "EXPECT_POISONED_S: id=%u %s, %s",
+        id, stack_origin, str);
+    EXPECT_EQ(1, 0);
+  }
+}
+
+#define EXPECT_NOT_POISONED(x) ExpectNotPoisoned(x)
+
+template<typename T>
+void ExpectNotPoisoned(const T& t) {
+  EXPECT_EQ(-1, __msan_test_shadow((void*)&t, sizeof(t)));
+}
+
+static U8 poisoned_array[100];
+template<class T>
+T *GetPoisoned(int i = 0, T val = 0) {
+  T *res = (T*)&poisoned_array[i];
+  *res = val;
+  __msan_poison(&poisoned_array[i], sizeof(T));
+  return res;
+}
+
+template<class T>
+T *GetPoisonedO(int i, U4 origin, T val = 0) {
+  T *res = (T*)&poisoned_array[i];
+  *res = val;
+  __msan_poison(&poisoned_array[i], sizeof(T));
+  __msan_set_origin(&poisoned_array[i], sizeof(T), origin);
+  return res;
+}
+
+// This function returns its parameter but in such a way that compiler
+// can not prove it.
+template<class T>
+NOINLINE
+static T Ident(T t) {
+  volatile T ret = t;
+  return ret;
+}
+
+template<class T> NOINLINE T ReturnPoisoned() { return *GetPoisoned<T>(); }
+
+static volatile int g_one = 1;
+static volatile int g_zero = 0;
+static volatile int g_0 = 0;
+static volatile int g_1 = 1;
+
+S4 a_s4[100];
+S8 a_s8[100];
+
+// Check that malloc poisons memory.
+// A lot of tests below depend on this.
+TEST(MemorySanitizerSanity, PoisonInMalloc) {
+  int *x = (int*)malloc(sizeof(int));
+  EXPECT_POISONED(*x);
+  free(x);
+}
+
+TEST(MemorySanitizer, NegativeTest1) {
+  S4 *x = GetPoisoned<S4>();
+  if (g_one)
+    *x = 0;
+  EXPECT_NOT_POISONED(*x);
+}
+
+TEST(MemorySanitizer, PositiveTest1) {
+  // Load to store.
+  EXPECT_POISONED(*GetPoisoned<S1>());
+  EXPECT_POISONED(*GetPoisoned<S2>());
+  EXPECT_POISONED(*GetPoisoned<S4>());
+  EXPECT_POISONED(*GetPoisoned<S8>());
+
+  // S->S conversions.
+  EXPECT_POISONED(*GetPoisoned<S1>());
+  EXPECT_POISONED(*GetPoisoned<S1>());
+  EXPECT_POISONED(*GetPoisoned<S1>());
+
+  EXPECT_POISONED(*GetPoisoned<S2>());
+  EXPECT_POISONED(*GetPoisoned<S2>());
+  EXPECT_POISONED(*GetPoisoned<S2>());
+
+  EXPECT_POISONED(*GetPoisoned<S4>());
+  EXPECT_POISONED(*GetPoisoned<S4>());
+  EXPECT_POISONED(*GetPoisoned<S4>());
+
+  EXPECT_POISONED(*GetPoisoned<S8>());
+  EXPECT_POISONED(*GetPoisoned<S8>());
+  EXPECT_POISONED(*GetPoisoned<S8>());
+
+  // ZExt
+  EXPECT_POISONED(*GetPoisoned<U1>());
+  EXPECT_POISONED(*GetPoisoned<U1>());
+  EXPECT_POISONED(*GetPoisoned<U1>());
+  EXPECT_POISONED(*GetPoisoned<U2>());
+  EXPECT_POISONED(*GetPoisoned<U2>());
+  EXPECT_POISONED(*GetPoisoned<U4>());
+
+  // Unary ops.
+  EXPECT_POISONED(- *GetPoisoned<S4>());
+
+  EXPECT_UMR(a_s4[g_zero] = 100 / *GetPoisoned<S4>(0, 1));
+
+
+  a_s4[g_zero] = 1 - *GetPoisoned<S4>();
+  a_s4[g_zero] = 1 + *GetPoisoned<S4>();
+}
+
+TEST(MemorySanitizer, Phi1) {
+  S4 c;
+  if (g_one) {
+    c = *GetPoisoned<S4>();
+  } else {
+    break_optimization(0);
+    c = 0;
+  }
+  EXPECT_POISONED(c);
+}
+
+TEST(MemorySanitizer, Phi2) {
+  S4 i = *GetPoisoned<S4>();
+  S4 n = g_one;
+  EXPECT_UMR(for (; i < g_one; i++););
+  EXPECT_POISONED(i);
+}
+
+NOINLINE void Arg1ExpectUMR(S4 a1) { EXPECT_POISONED(a1); }
+NOINLINE void Arg2ExpectUMR(S4 a1, S4 a2) { EXPECT_POISONED(a2); }
+NOINLINE void Arg3ExpectUMR(S1 a1, S4 a2, S8 a3) { EXPECT_POISONED(a3); }
+
+TEST(MemorySanitizer, ArgTest) {
+  Arg1ExpectUMR(*GetPoisoned<S4>());
+  Arg2ExpectUMR(0, *GetPoisoned<S4>());
+  Arg3ExpectUMR(0, 1, *GetPoisoned<S8>());
+}
+
+
+TEST(MemorySanitizer, CallAndRet) {
+  if (!__msan_has_dynamic_component()) return;
+  ReturnPoisoned<S1>();
+  ReturnPoisoned<S2>();
+  ReturnPoisoned<S4>();
+  ReturnPoisoned<S8>();
+
+  EXPECT_POISONED(ReturnPoisoned<S1>());
+  EXPECT_POISONED(ReturnPoisoned<S2>());
+  EXPECT_POISONED(ReturnPoisoned<S4>());
+  EXPECT_POISONED(ReturnPoisoned<S8>());
+}
+
+// malloc() in the following test may be optimized to produce a compile-time
+// undef value. Check that we trap on the volatile assignment anyway.
+TEST(MemorySanitizer, DISABLED_MallocNoIdent) {
+  S4 *x = (int*)malloc(sizeof(S4));
+  EXPECT_POISONED(*x);
+  free(x);
+}
+
+TEST(MemorySanitizer, Malloc) {
+  S4 *x = (int*)Ident(malloc(sizeof(S4)));
+  EXPECT_POISONED(*x);
+  free(x);
+}
+
+TEST(MemorySanitizer, Realloc) {
+  S4 *x = (int*)Ident(realloc(0, sizeof(S4)));
+  EXPECT_POISONED(x[0]);
+  x[0] = 1;
+  x = (int*)Ident(realloc(x, 2 * sizeof(S4)));
+  EXPECT_NOT_POISONED(x[0]);  // Ok, was inited before.
+  EXPECT_POISONED(x[1]);
+  x = (int*)Ident(realloc(x, 3 * sizeof(S4)));
+  EXPECT_NOT_POISONED(x[0]);  // Ok, was inited before.
+  EXPECT_POISONED(x[2]);
+  EXPECT_POISONED(x[1]);
+  x[2] = 1;  // Init this here. Check that after realloc it is poisoned again.
+  x = (int*)Ident(realloc(x, 2 * sizeof(S4)));
+  EXPECT_NOT_POISONED(x[0]);  // Ok, was inited before.
+  EXPECT_POISONED(x[1]);
+  x = (int*)Ident(realloc(x, 3 * sizeof(S4)));
+  EXPECT_POISONED(x[1]);
+  EXPECT_POISONED(x[2]);
+  free(x);
+}
+
+TEST(MemorySanitizer, Calloc) {
+  S4 *x = (int*)Ident(calloc(1, sizeof(S4)));
+  EXPECT_NOT_POISONED(*x);  // Should not be poisoned.
+  // EXPECT_EQ(0, *x);
+  free(x);
+}
+
+TEST(MemorySanitizer, AndOr) {
+  U4 *p = GetPoisoned<U4>();
+  // We poison two bytes in the midle of a 4-byte word to make the test
+  // correct regardless of endianness.
+  ((U1*)p)[1] = 0;
+  ((U1*)p)[2] = 0xff;
+  EXPECT_NOT_POISONED(*p & 0x00ffff00);
+  EXPECT_NOT_POISONED(*p & 0x00ff0000);
+  EXPECT_NOT_POISONED(*p & 0x0000ff00);
+  EXPECT_POISONED(*p & 0xff000000);
+  EXPECT_POISONED(*p & 0x000000ff);
+  EXPECT_POISONED(*p & 0x0000ffff);
+  EXPECT_POISONED(*p & 0xffff0000);
+
+  EXPECT_NOT_POISONED(*p | 0xff0000ff);
+  EXPECT_NOT_POISONED(*p | 0xff00ffff);
+  EXPECT_NOT_POISONED(*p | 0xffff00ff);
+  EXPECT_POISONED(*p | 0xff000000);
+  EXPECT_POISONED(*p | 0x000000ff);
+  EXPECT_POISONED(*p | 0x0000ffff);
+  EXPECT_POISONED(*p | 0xffff0000);
+
+  EXPECT_POISONED(*GetPoisoned<bool>() & *GetPoisoned<bool>());
+}
+
+template<class T>
+static bool applyNot(T value, T shadow) {
+  __msan_partial_poison(&value, &shadow, sizeof(T));
+  return !value;
+}
+
+TEST(MemorySanitizer, Not) {
+  EXPECT_NOT_POISONED(applyNot<U4>(0x0, 0x0));
+  EXPECT_NOT_POISONED(applyNot<U4>(0xFFFFFFFF, 0x0));
+  EXPECT_POISONED(applyNot<U4>(0xFFFFFFFF, 0xFFFFFFFF));
+  EXPECT_NOT_POISONED(applyNot<U4>(0xFF000000, 0x0FFFFFFF));
+  EXPECT_NOT_POISONED(applyNot<U4>(0xFF000000, 0x00FFFFFF));
+  EXPECT_NOT_POISONED(applyNot<U4>(0xFF000000, 0x0000FFFF));
+  EXPECT_NOT_POISONED(applyNot<U4>(0xFF000000, 0x00000000));
+  EXPECT_POISONED(applyNot<U4>(0xFF000000, 0xFF000000));
+  EXPECT_NOT_POISONED(applyNot<U4>(0xFF800000, 0xFF000000));
+  EXPECT_POISONED(applyNot<U4>(0x00008000, 0x00008000));
+
+  EXPECT_NOT_POISONED(applyNot<U1>(0x0, 0x0));
+  EXPECT_NOT_POISONED(applyNot<U1>(0xFF, 0xFE));
+  EXPECT_NOT_POISONED(applyNot<U1>(0xFF, 0x0));
+  EXPECT_POISONED(applyNot<U1>(0xFF, 0xFF));
+
+  EXPECT_POISONED(applyNot<void*>((void*)0xFFFFFF, (void*)(-1)));
+  EXPECT_NOT_POISONED(applyNot<void*>((void*)0xFFFFFF, (void*)(-2)));
+}
+
+TEST(MemorySanitizer, Shift) {
+  U4 *up = GetPoisoned<U4>();
+  ((U1*)up)[0] = 0;
+  ((U1*)up)[3] = 0xff;
+  EXPECT_NOT_POISONED(*up >> 30);
+  EXPECT_NOT_POISONED(*up >> 24);
+  EXPECT_POISONED(*up >> 23);
+  EXPECT_POISONED(*up >> 10);
+
+  EXPECT_NOT_POISONED(*up << 30);
+  EXPECT_NOT_POISONED(*up << 24);
+  EXPECT_POISONED(*up << 23);
+  EXPECT_POISONED(*up << 10);
+
+  S4 *sp = (S4*)up;
+  EXPECT_NOT_POISONED(*sp >> 30);
+  EXPECT_NOT_POISONED(*sp >> 24);
+  EXPECT_POISONED(*sp >> 23);
+  EXPECT_POISONED(*sp >> 10);
+
+  sp = GetPoisoned<S4>();
+  ((S1*)sp)[1] = 0;
+  ((S1*)sp)[2] = 0;
+  EXPECT_POISONED(*sp >> 31);
+
+  EXPECT_POISONED(100 >> *GetPoisoned<S4>());
+  EXPECT_POISONED(100U >> *GetPoisoned<S4>());
+}
+
+NOINLINE static int GetPoisonedZero() {
+  int *zero = new int;
+  *zero = 0;
+  __msan_poison(zero, sizeof(*zero));
+  int res = *zero;
+  delete zero;
+  return res;
+}
+
+TEST(MemorySanitizer, LoadFromDirtyAddress) {
+  int *a = new int;
+  *a = 0;
+  EXPECT_UMR(break_optimization((void*)(U8)a[GetPoisonedZero()]));
+  delete a;
+}
+
+TEST(MemorySanitizer, StoreToDirtyAddress) {
+  int *a = new int;
+  EXPECT_UMR(a[GetPoisonedZero()] = 0);
+  break_optimization(a);
+  delete a;
+}
+
+
+NOINLINE void StackTestFunc() {
+  S4 p4;
+  S4 ok4 = 1;
+  S2 p2;
+  S2 ok2 = 1;
+  S1 p1;
+  S1 ok1 = 1;
+  break_optimization(&p4);
+  break_optimization(&ok4);
+  break_optimization(&p2);
+  break_optimization(&ok2);
+  break_optimization(&p1);
+  break_optimization(&ok1);
+
+  EXPECT_POISONED(p4);
+  EXPECT_POISONED(p2);
+  EXPECT_POISONED(p1);
+  EXPECT_NOT_POISONED(ok1);
+  EXPECT_NOT_POISONED(ok2);
+  EXPECT_NOT_POISONED(ok4);
+}
+
+TEST(MemorySanitizer, StackTest) {
+  StackTestFunc();
+}
+
+NOINLINE void StackStressFunc() {
+  int foo[10000];
+  break_optimization(foo);
+}
+
+TEST(MemorySanitizer, DISABLED_StackStressTest) {
+  for (int i = 0; i < 1000000; i++)
+    StackStressFunc();
+}
+
+template<class T>
+void TestFloatingPoint() {
+  static volatile T v;
+  static T g[100];
+  break_optimization(&g);
+  T *x = GetPoisoned<T>();
+  T *y = GetPoisoned<T>(1);
+  EXPECT_POISONED(*x);
+  EXPECT_POISONED((long long)*x);
+  EXPECT_POISONED((int)*x);
+  g[0] = *x;
+  g[1] = *x + *y;
+  g[2] = *x - *y;
+  g[3] = *x * *y;
+}
+
+TEST(MemorySanitizer, FloatingPointTest) {
+  TestFloatingPoint<float>();
+  TestFloatingPoint<double>();
+}
+
+TEST(MemorySanitizer, DynMem) {
+  S4 x = 0;
+  S4 *y = GetPoisoned<S4>();
+  memcpy(y, &x, g_one * sizeof(S4));
+  EXPECT_NOT_POISONED(*y);
+}
+
+static char *DynRetTestStr;
+
+TEST(MemorySanitizer, DynRet) {
+  if (!__msan_has_dynamic_component()) return;
+  ReturnPoisoned<S8>();
+  EXPECT_NOT_POISONED(clearenv());
+}
+
+
+TEST(MemorySanitizer, DynRet1) {
+  if (!__msan_has_dynamic_component()) return;
+  ReturnPoisoned<S8>();
+}
+
+struct LargeStruct {
+  S4 x[10];
+};
+
+NOINLINE
+LargeStruct LargeRetTest() {
+  LargeStruct res;
+  res.x[0] = *GetPoisoned<S4>();
+  res.x[1] = *GetPoisoned<S4>();
+  res.x[2] = *GetPoisoned<S4>();
+  res.x[3] = *GetPoisoned<S4>();
+  res.x[4] = *GetPoisoned<S4>();
+  res.x[5] = *GetPoisoned<S4>();
+  res.x[6] = *GetPoisoned<S4>();
+  res.x[7] = *GetPoisoned<S4>();
+  res.x[8] = *GetPoisoned<S4>();
+  res.x[9] = *GetPoisoned<S4>();
+  return res;
+}
+
+TEST(MemorySanitizer, LargeRet) {
+  LargeStruct a = LargeRetTest();
+  EXPECT_POISONED(a.x[0]);
+  EXPECT_POISONED(a.x[9]);
+}
+
+TEST(MemorySanitizer, fread) {
+  char *x = new char[32];
+  FILE *f = fopen("/proc/self/stat", "r");
+  assert(f);
+  fread(x, 1, 32, f);
+  EXPECT_NOT_POISONED(x[0]);
+  EXPECT_NOT_POISONED(x[16]);
+  EXPECT_NOT_POISONED(x[31]);
+  fclose(f);
+  delete x;
+}
+
+TEST(MemorySanitizer, read) {
+  char *x = new char[32];
+  int fd = open("/proc/self/stat", O_RDONLY);
+  assert(fd > 0);
+  int sz = read(fd, x, 32);
+  assert(sz == 32);
+  EXPECT_NOT_POISONED(x[0]);
+  EXPECT_NOT_POISONED(x[16]);
+  EXPECT_NOT_POISONED(x[31]);
+  close(fd);
+  delete x;
+}
+
+TEST(MemorySanitizer, pread) {
+  char *x = new char[32];
+  int fd = open("/proc/self/stat", O_RDONLY);
+  assert(fd > 0);
+  int sz = pread(fd, x, 32, 0);
+  assert(sz == 32);
+  EXPECT_NOT_POISONED(x[0]);
+  EXPECT_NOT_POISONED(x[16]);
+  EXPECT_NOT_POISONED(x[31]);
+  close(fd);
+  delete x;
+}
+
+// FIXME: fails now.
+TEST(MemorySanitizer, DISABLED_ioctl) {
+  struct winsize ws;
+  EXPECT_EQ(ioctl(2, TIOCGWINSZ, &ws), 0);
+  EXPECT_NOT_POISONED(ws.ws_col);
+}
+
+TEST(MemorySanitizer, readlink) {
+  char *x = new char[1000];
+  readlink("/proc/self/exe", x, 1000);
+  EXPECT_NOT_POISONED(x[0]);
+  delete [] x;
+}
+
+
+TEST(MemorySanitizer, stat) {
+  struct stat* st = new struct stat;
+  int res = stat("/proc/self/stat", st);
+  assert(!res);
+  EXPECT_NOT_POISONED(st->st_dev);
+  EXPECT_NOT_POISONED(st->st_mode);
+  EXPECT_NOT_POISONED(st->st_size);
+}
+
+TEST(MemorySanitizer, statfs) {
+  struct statfs* st = new struct statfs;
+  int res = statfs("/", st);
+  assert(!res);
+  EXPECT_NOT_POISONED(st->f_type);
+  EXPECT_NOT_POISONED(st->f_bfree);
+  EXPECT_NOT_POISONED(st->f_namelen);
+}
+
+TEST(MemorySanitizer, pipe) {
+  int* pipefd = new int[2];
+  int res = pipe(pipefd);
+  assert(!res);
+  EXPECT_NOT_POISONED(pipefd[0]);
+  EXPECT_NOT_POISONED(pipefd[1]);
+  close(pipefd[0]);
+  close(pipefd[1]);
+}
+
+TEST(MemorySanitizer, getcwd) {
+  char path[PATH_MAX + 1];
+  char* res = getcwd(path, sizeof(path));
+  assert(res);
+  EXPECT_NOT_POISONED(path[0]);
+}
+
+TEST(MemorySanitizer, getcwd_gnu) {
+  char* res = getcwd(NULL, 0);
+  assert(res);
+  EXPECT_NOT_POISONED(res[0]);
+  free(res);
+}
+
+TEST(MemorySanitizer, realpath) {
+  const char* relpath = ".";
+  char path[PATH_MAX + 1];
+  char* res = realpath(relpath, path);
+  assert(res);
+  EXPECT_NOT_POISONED(path[0]);
+}
+
+TEST(MemorySanitizer, memcpy) {
+  char* x = new char[2];
+  char* y = new char[2];
+  x[0] = 1;
+  x[1] = *GetPoisoned<char>();
+  memcpy(y, x, 2);
+  EXPECT_NOT_POISONED(y[0]);
+  EXPECT_POISONED(y[1]);
+}
+
+TEST(MemorySanitizer, memmove) {
+  char* x = new char[2];
+  char* y = new char[2];
+  x[0] = 1;
+  x[1] = *GetPoisoned<char>();
+  memmove(y, x, 2);
+  EXPECT_NOT_POISONED(y[0]);
+  EXPECT_POISONED(y[1]);
+}
+
+TEST(MemorySanitizer, strdup) {
+  char *x = strdup("zzz");
+  EXPECT_NOT_POISONED(*x);
+  free(x);
+}
+
+template<class T, int size>
+void TestOverlapMemmove() {
+  T *x = new T[size];
+  assert(size >= 3);
+  x[2] = 0;
+  memmove(x, x + 1, (size - 1) * sizeof(T));
+  EXPECT_NOT_POISONED(x[1]);
+  if (!__msan_has_dynamic_component()) {
+    // FIXME: under DR we will lose this information
+    // because accesses in memmove will unpoisin the shadow.
+    // We need to use our own memove implementation instead of libc's.
+    EXPECT_POISONED(x[0]);
+    EXPECT_POISONED(x[2]);
+  }
+  delete [] x;
+}
+
+TEST(MemorySanitizer, overlap_memmove) {
+  TestOverlapMemmove<U1, 10>();
+  TestOverlapMemmove<U1, 1000>();
+  TestOverlapMemmove<U8, 4>();
+  TestOverlapMemmove<U8, 1000>();
+}
+
+TEST(MemorySanitizer, strcpy) {  // NOLINT
+  char* x = new char[3];
+  char* y = new char[3];
+  x[0] = 'a';
+  x[1] = *GetPoisoned<char>(1, 1);
+  x[2] = 0;
+  strcpy(y, x);  // NOLINT
+  EXPECT_NOT_POISONED(y[0]);
+  EXPECT_POISONED(y[1]);
+  EXPECT_NOT_POISONED(y[2]);
+}
+
+TEST(MemorySanitizer, strncpy) {  // NOLINT
+  char* x = new char[3];
+  char* y = new char[3];
+  x[0] = 'a';
+  x[1] = *GetPoisoned<char>(1, 1);
+  x[2] = 0;
+  strncpy(y, x, 2);  // NOLINT
+  EXPECT_NOT_POISONED(y[0]);
+  EXPECT_POISONED(y[1]);
+  EXPECT_POISONED(y[2]);
+}
+
+TEST(MemorySanitizer, strtol) {
+  char *e;
+  assert(1 == strtol("1", &e, 10));
+  EXPECT_NOT_POISONED((S8) e);
+}
+
+TEST(MemorySanitizer, strtoll) {
+  char *e;
+  assert(1 == strtoll("1", &e, 10));
+  EXPECT_NOT_POISONED((S8) e);
+}
+
+TEST(MemorySanitizer, strtoul) {
+  char *e;
+  assert(1 == strtoul("1", &e, 10));
+  EXPECT_NOT_POISONED((S8) e);
+}
+
+TEST(MemorySanitizer, strtoull) {
+  char *e;
+  assert(1 == strtoull("1", &e, 10));
+  EXPECT_NOT_POISONED((S8) e);
+}
+
+TEST(MemorySanitizer, strtod) {
+  char *e;
+  assert(0 != strtod("1.5", &e));
+  EXPECT_NOT_POISONED((S8) e);
+}
+
+TEST(MemorySanitizer, strtof) {
+  char *e;
+  assert(0 != strtof("1.5", &e));
+  EXPECT_NOT_POISONED((S8) e);
+}
+
+TEST(MemorySanitizer, strtold) {
+  char *e;
+  assert(0 != strtold("1.5", &e));
+  EXPECT_NOT_POISONED((S8) e);
+}
+
+TEST(MemorySanitizer, sprintf) {  // NOLINT
+  char buff[10];
+  break_optimization(buff);
+  EXPECT_POISONED(buff[0]);
+  int res = sprintf(buff, "%d", 1234567);  // NOLINT
+  assert(res == 7);
+  assert(buff[0] == '1');
+  assert(buff[1] == '2');
+  assert(buff[2] == '3');
+  assert(buff[6] == '7');
+  assert(buff[7] == 0);
+  EXPECT_POISONED(buff[8]);
+}
+
+TEST(MemorySanitizer, snprintf) {
+  char buff[10];
+  break_optimization(buff);
+  EXPECT_POISONED(buff[0]);
+  int res = snprintf(buff, sizeof(buff), "%d", 1234567);
+  assert(res == 7);
+  assert(buff[0] == '1');
+  assert(buff[1] == '2');
+  assert(buff[2] == '3');
+  assert(buff[6] == '7');
+  assert(buff[7] == 0);
+  EXPECT_POISONED(buff[8]);
+}
+
+TEST(MemorySanitizer, swprintf) {
+  wchar_t buff[10];
+  assert(sizeof(wchar_t) == 4);
+  break_optimization(buff);
+  EXPECT_POISONED(buff[0]);
+  int res = swprintf(buff, 9, L"%d", 1234567);
+  assert(res == 7);
+  assert(buff[0] == '1');
+  assert(buff[1] == '2');
+  assert(buff[2] == '3');
+  assert(buff[6] == '7');
+  assert(buff[7] == 0);
+  EXPECT_POISONED(buff[8]);
+}
+
+TEST(MemorySanitizer, wcstombs) {
+  const wchar_t *x = L"abc";
+  char buff[10];
+  int res = wcstombs(buff, x, 4);
+  EXPECT_EQ(res, 3);
+  EXPECT_EQ(buff[0], 'a');
+  EXPECT_EQ(buff[1], 'b');
+  EXPECT_EQ(buff[2], 'c');
+}
+
+TEST(MemorySanitizer, gettimeofday) {
+  struct timeval tv;
+  struct timezone tz;
+  break_optimization(&tv);
+  break_optimization(&tz);
+  assert(sizeof(tv) == 16);
+  assert(sizeof(tz) == 8);
+  EXPECT_POISONED(tv.tv_sec);
+  EXPECT_POISONED(tv.tv_usec);
+  EXPECT_POISONED(tz.tz_minuteswest);
+  EXPECT_POISONED(tz.tz_dsttime);
+  assert(0 == gettimeofday(&tv, &tz));
+  EXPECT_NOT_POISONED(tv.tv_sec);
+  EXPECT_NOT_POISONED(tv.tv_usec);
+  EXPECT_NOT_POISONED(tz.tz_minuteswest);
+  EXPECT_NOT_POISONED(tz.tz_dsttime);
+}
+
+TEST(MemorySanitizer, localtime) {
+  time_t t = 123;
+  struct tm *time = localtime(&t);
+  assert(time != 0);
+  EXPECT_NOT_POISONED(time->tm_sec);
+  EXPECT_NOT_POISONED(time->tm_hour);
+  EXPECT_NOT_POISONED(time->tm_year);
+  EXPECT_NOT_POISONED(time->tm_isdst);
+}
+
+TEST(MemorySanitizer, localtime_r) {
+  time_t t = 123;
+  struct tm time;
+  struct tm *res = localtime_r(&t, &time);
+  assert(res != 0);
+  EXPECT_NOT_POISONED(time.tm_sec);
+  EXPECT_NOT_POISONED(time.tm_hour);
+  EXPECT_NOT_POISONED(time.tm_year);
+  EXPECT_NOT_POISONED(time.tm_isdst);
+}
+
+TEST(MemorySanitizer, mmap) {
+  const int size = 4096;
+  void *p1, *p2;
+  p1 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+  __msan_poison(p1, size);
+  munmap(p1, size);
+  for (int i = 0; i < 1000; i++) {
+    p2 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+    if (p2 == p1)
+      break;
+    else
+      munmap(p2, size);
+  }
+  if (p1 == p2) {
+    EXPECT_NOT_POISONED(*(char*)p2);
+    munmap(p2, size);
+  }
+}
+
+// FIXME: enable and add ecvt.
+// FIXME: check why msandr does nt handle fcvt.
+TEST(MemorySanitizer, fcvt) {
+  int a, b;
+  break_optimization(&a);
+  break_optimization(&b);
+  EXPECT_POISONED(a);
+  EXPECT_POISONED(b);
+  char *str = fcvt(12345.6789, 10, &a, &b);
+  EXPECT_NOT_POISONED(a);
+  EXPECT_NOT_POISONED(b);
+}
+
+struct StructWithDtor {
+  ~StructWithDtor();
+};
+
+NOINLINE StructWithDtor::~StructWithDtor() {
+  break_optimization(0);
+}
+
+TEST(MemorySanitizer, Invoke) {
+  StructWithDtor s;  // Will cause the calls to become invokes.
+  EXPECT_NOT_POISONED(0);
+  EXPECT_POISONED(*GetPoisoned<int>());
+  EXPECT_NOT_POISONED(0);
+  EXPECT_POISONED(*GetPoisoned<int>());
+  EXPECT_POISONED(ReturnPoisoned<S4>());
+}
+
+TEST(MemorySanitizer, ptrtoint) {
+  // Test that shadow is propagated through pointer-to-integer conversion.
+  void* p = (void*)0xABCD;
+  __msan_poison(((char*)&p) + 1, sizeof(p));
+  EXPECT_NOT_POISONED((((uintptr_t)p) & 0xFF) == 0);
+
+  void* q = (void*)0xABCD;
+  __msan_poison(&q, sizeof(q) - 1);
+  EXPECT_POISONED((((uintptr_t)q) & 0xFF) == 0);
+}
+
+static void vaargsfn2(int guard, ...) {
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, double));
+  va_end(vl);
+}
+
+static void vaargsfn(int guard, ...) {
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+  // The following call will overwrite __msan_param_tls.
+  // Checks after it test that arg shadow was somehow saved across the call.
+  vaargsfn2(1, 2, 3, 4, *GetPoisoned<double>());
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+  va_end(vl);
+}
+
+TEST(MemorySanitizer, VAArgTest) {
+  int* x = GetPoisoned<int>();
+  int* y = GetPoisoned<int>(4);
+  vaargsfn(1, 13, *x, 42, *y);
+}
+
+static void vaargsfn_many(int guard, ...) {
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+  va_end(vl);
+}
+
+TEST(MemorySanitizer, VAArgManyTest) {
+  int* x = GetPoisoned<int>();
+  int* y = GetPoisoned<int>(4);
+  vaargsfn_many(1, 2, *x, 3, 4, 5, 6, 7, 8, 9, *y);
+}
+
+static void vaargsfn_pass2(va_list vl) {
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+}
+
+static void vaargsfn_pass(int guard, ...) {
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_POISONED(va_arg(vl, int));
+  vaargsfn_pass2(vl);
+  va_end(vl);
+}
+
+TEST(MemorySanitizer, VAArgPass) {
+  int* x = GetPoisoned<int>();
+  int* y = GetPoisoned<int>(4);
+  vaargsfn_pass(1, *x, 2, 3, *y);
+}
+
+static void vaargsfn_copy2(va_list vl) {
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+}
+
+static void vaargsfn_copy(int guard, ...) {
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+  va_list vl2;
+  va_copy(vl2, vl);
+  vaargsfn_copy2(vl2);
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+  va_end(vl);
+}
+
+TEST(MemorySanitizer, VAArgCopy) {
+  int* x = GetPoisoned<int>();
+  int* y = GetPoisoned<int>(4);
+  vaargsfn_copy(1, 2, *x, 3, *y);
+}
+
+static void vaargsfn_ptr(int guard, ...) {
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_NOT_POISONED(va_arg(vl, int*));
+  EXPECT_POISONED(va_arg(vl, int*));
+  EXPECT_NOT_POISONED(va_arg(vl, int*));
+  EXPECT_POISONED(va_arg(vl, double*));
+  va_end(vl);
+}
+
+TEST(MemorySanitizer, VAArgPtr) {
+  int** x = GetPoisoned<int*>();
+  double** y = GetPoisoned<double*>(8);
+  int z;
+  vaargsfn_ptr(1, &z, *x, &z, *y);
+}
+
+static void vaargsfn_overflow(int guard, ...) {
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+
+  EXPECT_NOT_POISONED(va_arg(vl, double));
+  EXPECT_NOT_POISONED(va_arg(vl, double));
+  EXPECT_NOT_POISONED(va_arg(vl, double));
+  EXPECT_POISONED(va_arg(vl, double));
+  EXPECT_NOT_POISONED(va_arg(vl, double));
+  EXPECT_POISONED(va_arg(vl, int*));
+  EXPECT_NOT_POISONED(va_arg(vl, double));
+  EXPECT_NOT_POISONED(va_arg(vl, double));
+
+  EXPECT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, double));
+  EXPECT_POISONED(va_arg(vl, int*));
+
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_NOT_POISONED(va_arg(vl, double));
+  EXPECT_NOT_POISONED(va_arg(vl, int*));
+
+  EXPECT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, double));
+  EXPECT_POISONED(va_arg(vl, int*));
+
+  va_end(vl);
+}
+
+TEST(MemorySanitizer, VAArgOverflow) {
+  int* x = GetPoisoned<int>();
+  double* y = GetPoisoned<double>(8);
+  int** p = GetPoisoned<int*>(16);
+  int z;
+  vaargsfn_overflow(1,
+      1, 2, *x, 4, 5, 6,
+      1.1, 2.2, 3.3, *y, 5.5, *p, 7.7, 8.8,
+      // the following args will overflow for sure
+      *x, *y, *p,
+      7, 9.9, &z,
+      *x, *y, *p);
+}
+
+static void vaargsfn_tlsoverwrite2(int guard, ...) {
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  va_end(vl);
+}
+
+static void vaargsfn_tlsoverwrite(int guard, ...) {
+  // This call will overwrite TLS contents unless it's backed up somewhere.
+  vaargsfn_tlsoverwrite2(2, 42);
+  va_list vl;
+  va_start(vl, guard);
+  EXPECT_POISONED(va_arg(vl, int));
+  va_end(vl);
+}
+
+TEST(MemorySanitizer, VAArgTLSOverwrite) {
+  int* x = GetPoisoned<int>();
+  vaargsfn_tlsoverwrite(1, *x);
+}
+
+struct StructByVal {
+  int a, b, c, d, e, f;
+};
+
+NOINLINE void StructByValTestFunc(struct StructByVal s) {
+  EXPECT_NOT_POISONED(s.a);
+  EXPECT_POISONED(s.b);
+  EXPECT_NOT_POISONED(s.c);
+  EXPECT_POISONED(s.d);
+  EXPECT_NOT_POISONED(s.e);
+  EXPECT_POISONED(s.f);
+}
+
+NOINLINE void StructByValTestFunc1(struct StructByVal s) {
+  StructByValTestFunc(s);
+}
+
+NOINLINE void StructByValTestFunc2(int z, struct StructByVal s) {
+  StructByValTestFunc(s);
+}
+
+TEST(MemorySanitizer, StructByVal) {
+  // Large aggregates are passed as "byval" pointer argument in LLVM.
+  struct StructByVal s;
+  s.a = 1;
+  s.b = *GetPoisoned<int>();
+  s.c = 2;
+  s.d = *GetPoisoned<int>();
+  s.e = 3;
+  s.f = *GetPoisoned<int>();
+  StructByValTestFunc(s);
+  StructByValTestFunc1(s);
+  StructByValTestFunc2(0, s);
+}
+
+
+#if MSAN_HAS_M128
+NOINLINE __m128i m128Eq(__m128i *a, __m128i *b) { return *a == *b; }
+NOINLINE __m128i m128Lt(__m128i *a, __m128i *b) { return *a < *b; }
+TEST(MemorySanitizer, m128) {
+  __m128i a = _mm_set1_epi16(0x1234);
+  __m128i b = _mm_set1_epi16(0x7890);
+  EXPECT_NOT_POISONED(m128Eq(&a, &b));
+  EXPECT_NOT_POISONED(m128Lt(&a, &b));
+}
+// FIXME: add more tests for __m128i.
+#endif  // MSAN_HAS_M128
+
+// We should not complain when copying this poisoned hole.
+struct StructWithHole {
+  U4  a;
+  // 4-byte hole.
+  U8  b;
+};
+
+NOINLINE StructWithHole ReturnStructWithHole() {
+  StructWithHole res;
+  __msan_poison(&res, sizeof(res));
+  res.a = 1;
+  res.b = 2;
+  return res;
+}
+
+TEST(MemorySanitizer, StructWithHole) {
+  StructWithHole a = ReturnStructWithHole();
+  break_optimization(&a);
+}
+
+template <class T>
+NOINLINE T ReturnStruct() {
+  T res;
+  __msan_poison(&res, sizeof(res));
+  res.a = 1;
+  return res;
+}
+
+template <class T>
+NOINLINE void TestReturnStruct() {
+  T s1 = ReturnStruct<T>();
+  EXPECT_NOT_POISONED(s1.a);
+  EXPECT_POISONED(s1.b);
+}
+
+struct SSS1 {
+  int a, b, c;
+};
+struct SSS2 {
+  int b, a, c;
+};
+struct SSS3 {
+  int b, c, a;
+};
+struct SSS4 {
+  int c, b, a;
+};
+
+struct SSS5 {
+  int a;
+  float b;
+};
+struct SSS6 {
+  int a;
+  double b;
+};
+struct SSS7 {
+  S8 b;
+  int a;
+};
+struct SSS8 {
+  S2 b;
+  S8 a;
+};
+
+TEST(MemorySanitizer, IntStruct3) {
+  TestReturnStruct<SSS1>();
+  TestReturnStruct<SSS2>();
+  TestReturnStruct<SSS3>();
+  TestReturnStruct<SSS4>();
+  TestReturnStruct<SSS5>();
+  TestReturnStruct<SSS6>();
+  TestReturnStruct<SSS7>();
+  TestReturnStruct<SSS8>();
+}
+
+struct LongStruct {
+  U1 a1, b1;
+  U2 a2, b2;
+  U4 a4, b4;
+  U8 a8, b8;
+};
+
+NOINLINE LongStruct ReturnLongStruct1() {
+  LongStruct res;
+  __msan_poison(&res, sizeof(res));
+  res.a1 = res.a2 = res.a4 = res.a8 = 111;
+  // leaves b1, .., b8 poisoned.
+  return res;
+}
+
+NOINLINE LongStruct ReturnLongStruct2() {
+  LongStruct res;
+  __msan_poison(&res, sizeof(res));
+  res.b1 = res.b2 = res.b4 = res.b8 = 111;
+  // leaves a1, .., a8 poisoned.
+  return res;
+}
+
+TEST(MemorySanitizer, LongStruct) {
+  LongStruct s1 = ReturnLongStruct1();
+  __msan_print_shadow(&s1, sizeof(s1));
+  EXPECT_NOT_POISONED(s1.a1);
+  EXPECT_NOT_POISONED(s1.a2);
+  EXPECT_NOT_POISONED(s1.a4);
+  EXPECT_NOT_POISONED(s1.a8);
+
+  EXPECT_POISONED(s1.b1);
+  EXPECT_POISONED(s1.b2);
+  EXPECT_POISONED(s1.b4);
+  EXPECT_POISONED(s1.b8);
+
+  LongStruct s2 = ReturnLongStruct2();
+  __msan_print_shadow(&s2, sizeof(s2));
+  EXPECT_NOT_POISONED(s2.b1);
+  EXPECT_NOT_POISONED(s2.b2);
+  EXPECT_NOT_POISONED(s2.b4);
+  EXPECT_NOT_POISONED(s2.b8);
+
+  EXPECT_POISONED(s2.a1);
+  EXPECT_POISONED(s2.a2);
+  EXPECT_POISONED(s2.a4);
+  EXPECT_POISONED(s2.a8);
+}
+
+TEST(MemorySanitizer, getrlimit) {
+  struct rlimit limit;
+  __msan_poison(&limit, sizeof(limit));
+  int result = getrlimit(RLIMIT_DATA, &limit);
+  assert(result == 0);
+  volatile rlim_t t;
+  t = limit.rlim_cur;
+  t = limit.rlim_max;
+}
+
+TEST(MemorySanitizer, getrusage) {
+  struct rusage usage;
+  __msan_poison(&usage, sizeof(usage));
+  int result = getrusage(RUSAGE_SELF, &usage);
+  assert(result == 0);
+  volatile struct timeval t;
+  EXPECT_NOT_POISONED(usage.ru_utime.tv_sec);
+  EXPECT_NOT_POISONED(usage.ru_utime.tv_usec);
+  EXPECT_NOT_POISONED(usage.ru_stime.tv_sec);
+  EXPECT_NOT_POISONED(usage.ru_stime.tv_usec);
+  EXPECT_NOT_POISONED(usage.ru_maxrss);
+  EXPECT_NOT_POISONED(usage.ru_minflt);
+  EXPECT_NOT_POISONED(usage.ru_majflt);
+  EXPECT_NOT_POISONED(usage.ru_inblock);
+  EXPECT_NOT_POISONED(usage.ru_oublock);
+  EXPECT_NOT_POISONED(usage.ru_nvcsw);
+  EXPECT_NOT_POISONED(usage.ru_nivcsw);
+}
+
+static void dladdr_testfn() {}
+
+TEST(MemorySanitizer, dladdr) {
+  Dl_info info;
+  __msan_poison(&info, sizeof(info));
+  int result = dladdr((const void*)dladdr_testfn, &info);
+  assert(result != 0);
+  EXPECT_NOT_POISONED((unsigned long)info.dli_fname);
+  if (info.dli_fname)
+    EXPECT_NOT_POISONED(strlen(info.dli_fname));
+  EXPECT_NOT_POISONED((unsigned long)info.dli_fbase);
+  EXPECT_NOT_POISONED((unsigned long)info.dli_sname);
+  if (info.dli_sname)
+    EXPECT_NOT_POISONED(strlen(info.dli_sname));
+  EXPECT_NOT_POISONED((unsigned long)info.dli_saddr);
+}
+
+#ifdef __GLIBC__
+extern "C" {
+  extern void *__libc_stack_end;
+}
+
+static char **GetArgv(void) {
+  uintptr_t *stack_end = (uintptr_t *)__libc_stack_end;
+  return (char**)(stack_end + 1);
+}
+
+#else  // __GLIBC__
+# error "TODO: port this"
+#endif
+
+TEST(MemorySanitizer, dlopen) {
+  // Compute the path to our loadable DSO.  We assume it's in the same
+  // directory.  Only use string routines that we intercept so far to do this.
+  char **argv = GetArgv();
+  const char *basename = "libmsan_loadable.x86_64.so";
+  size_t path_max = strlen(argv[0]) + 1 + strlen(basename) + 1;
+  char *path = new char[path_max];
+  char *last_slash = strrchr(argv[0], '/');
+  assert(last_slash);
+  snprintf(path, path_max, "%.*s/%s", int(last_slash - argv[0]),
+           argv[0], basename);
+
+  // We need to clear shadow for globals when doing dlopen.  In order to test
+  // this, we have to poison the shadow for the DSO before we load it.  In
+  // general this is difficult, but the loader tends to reload things in the
+  // same place, so we open, close, and then reopen.  The global should always
+  // start out clean after dlopen.
+  for (int i = 0; i < 2; i++) {
+    void *lib = dlopen(path, RTLD_LAZY);
+    if (lib == NULL) {
+      printf("dlerror: %s\n", dlerror());
+      assert(lib != NULL);
+    }
+    void **(*get_dso_global)() = (void **(*)())dlsym(lib, "get_dso_global");
+    assert(get_dso_global);
+    void **dso_global = get_dso_global();
+    EXPECT_NOT_POISONED(*dso_global);
+    __msan_poison(dso_global, sizeof(*dso_global));
+    EXPECT_POISONED(*dso_global);
+    dlclose(lib);
+  }
+
+  delete[] path;
+}
+
+TEST(MemorySanitizer, scanf) {
+  const char *input = "42 hello";
+  int* d = new int;
+  char* s = new char[7];
+  int res = sscanf(input, "%d %5s", d, s);
+  printf("res %d\n", res);
+  assert(res == 2);
+  EXPECT_NOT_POISONED(*d);
+  EXPECT_NOT_POISONED(s[0]);
+  EXPECT_NOT_POISONED(s[1]);
+  EXPECT_NOT_POISONED(s[2]);
+  EXPECT_NOT_POISONED(s[3]);
+  EXPECT_NOT_POISONED(s[4]);
+  EXPECT_NOT_POISONED(s[5]);
+  EXPECT_POISONED(s[6]);
+  delete s;
+  delete d;
+}
+
+static void* SimpleThread_threadfn(void* data) {
+  return new int;
+}
+
+TEST(MemorySanitizer, SimpleThread) {
+  pthread_t t;
+  void* p;
+  int res = pthread_create(&t, NULL, SimpleThread_threadfn, NULL);
+  assert(!res);
+  res = pthread_join(t, &p);
+  assert(!res);
+  if (!__msan_has_dynamic_component())  // FIXME: intercept pthread_join (?).
+    __msan_unpoison(&p, sizeof(p));
+  delete (int*)p;
+}
+
+TEST(MemorySanitizer, uname) {
+  struct utsname u;
+  int res = uname(&u);
+  assert(!res);
+  EXPECT_NOT_POISONED(strlen(u.sysname));
+  EXPECT_NOT_POISONED(strlen(u.nodename));
+  EXPECT_NOT_POISONED(strlen(u.release));
+  EXPECT_NOT_POISONED(strlen(u.version));
+  EXPECT_NOT_POISONED(strlen(u.machine));
+}
+
+TEST(MemorySanitizer, gethostname) {
+  char buf[100];
+  int res = gethostname(buf, 100);
+  assert(!res);
+  EXPECT_NOT_POISONED(strlen(buf));
+}
+
+template<class T>
+static bool applySlt(T value, T shadow) {
+  __msan_partial_poison(&value, &shadow, sizeof(T));
+  volatile bool zzz = true;
+  // This "|| zzz" trick somehow makes LLVM emit "icmp slt" instead of
+  // a shift-and-trunc to get at the highest bit.
+  volatile bool v = value < 0 || zzz;
+  return v;
+}
+
+TEST(MemorySanitizer, SignedCompareWithZero) {
+  EXPECT_NOT_POISONED(applySlt<S4>(0xF, 0xF));
+  EXPECT_NOT_POISONED(applySlt<S4>(0xF, 0xFF));
+  EXPECT_NOT_POISONED(applySlt<S4>(0xF, 0xFFFFFF));
+  EXPECT_NOT_POISONED(applySlt<S4>(0xF, 0x7FFFFFF));
+  EXPECT_UMR(applySlt<S4>(0xF, 0x80FFFFFF));
+  EXPECT_UMR(applySlt<S4>(0xF, 0xFFFFFFFF));
+}
+
+template <class T, class S>
+static T poisoned(T Va, S Sa) {
+  char SIZE_CHECK1[(ssize_t)sizeof(T) - (ssize_t)sizeof(S)];
+  char SIZE_CHECK2[(ssize_t)sizeof(S) - (ssize_t)sizeof(T)];
+  T a;
+  a = Va;
+  __msan_partial_poison(&a, &Sa, sizeof(T));
+  return a;
+}
+
+TEST(MemorySanitizer, ICmpRelational) {
+  EXPECT_NOT_POISONED(poisoned(0, 0) < poisoned(0, 0));
+  EXPECT_NOT_POISONED(poisoned(0U, 0) < poisoned(0U, 0));
+  EXPECT_NOT_POISONED(poisoned(0LL, 0LLU) < poisoned(0LL, 0LLU));
+  EXPECT_NOT_POISONED(poisoned(0LLU, 0LLU) < poisoned(0LLU, 0LLU));
+  EXPECT_POISONED(poisoned(0xFF, 0xFF) < poisoned(0xFF, 0xFF));
+  EXPECT_POISONED(poisoned(0xFFFFFFFFU, 0xFFFFFFFFU) <
+                  poisoned(0xFFFFFFFFU, 0xFFFFFFFFU));
+  EXPECT_POISONED(poisoned(-1, 0xFFFFFFFFU) <
+                  poisoned(-1, 0xFFFFFFFFU));
+
+  EXPECT_NOT_POISONED(poisoned(0, 0) <= poisoned(0, 0));
+  EXPECT_NOT_POISONED(poisoned(0U, 0) <= poisoned(0U, 0));
+  EXPECT_NOT_POISONED(poisoned(0LL, 0LLU) <= poisoned(0LL, 0LLU));
+  EXPECT_NOT_POISONED(poisoned(0LLU, 0LLU) <= poisoned(0LLU, 0LLU));
+  EXPECT_POISONED(poisoned(0xFF, 0xFF) <= poisoned(0xFF, 0xFF));
+  EXPECT_POISONED(poisoned(0xFFFFFFFFU, 0xFFFFFFFFU) <=
+                  poisoned(0xFFFFFFFFU, 0xFFFFFFFFU));
+  EXPECT_POISONED(poisoned(-1, 0xFFFFFFFFU) <=
+                  poisoned(-1, 0xFFFFFFFFU));
+
+  EXPECT_NOT_POISONED(poisoned(0, 0) > poisoned(0, 0));
+  EXPECT_NOT_POISONED(poisoned(0U, 0) > poisoned(0U, 0));
+  EXPECT_NOT_POISONED(poisoned(0LL, 0LLU) > poisoned(0LL, 0LLU));
+  EXPECT_NOT_POISONED(poisoned(0LLU, 0LLU) > poisoned(0LLU, 0LLU));
+  EXPECT_POISONED(poisoned(0xFF, 0xFF) > poisoned(0xFF, 0xFF));
+  EXPECT_POISONED(poisoned(0xFFFFFFFFU, 0xFFFFFFFFU) >
+                  poisoned(0xFFFFFFFFU, 0xFFFFFFFFU));
+  EXPECT_POISONED(poisoned(-1, 0xFFFFFFFFU) >
+                  poisoned(-1, 0xFFFFFFFFU));
+
+  EXPECT_NOT_POISONED(poisoned(0, 0) >= poisoned(0, 0));
+  EXPECT_NOT_POISONED(poisoned(0U, 0) >= poisoned(0U, 0));
+  EXPECT_NOT_POISONED(poisoned(0LL, 0LLU) >= poisoned(0LL, 0LLU));
+  EXPECT_NOT_POISONED(poisoned(0LLU, 0LLU) >= poisoned(0LLU, 0LLU));
+  EXPECT_POISONED(poisoned(0xFF, 0xFF) >= poisoned(0xFF, 0xFF));
+  EXPECT_POISONED(poisoned(0xFFFFFFFFU, 0xFFFFFFFFU) >=
+                  poisoned(0xFFFFFFFFU, 0xFFFFFFFFU));
+  EXPECT_POISONED(poisoned(-1, 0xFFFFFFFFU) >=
+                  poisoned(-1, 0xFFFFFFFFU));
+
+  EXPECT_POISONED(poisoned(6, 0xF) > poisoned(7, 0));
+  EXPECT_POISONED(poisoned(0xF, 0xF) > poisoned(7, 0));
+
+  EXPECT_NOT_POISONED(poisoned(-1, 0x80000000U) >= poisoned(-1, 0U));
+}
+
+#if MSAN_HAS_M128
+TEST(MemorySanitizer, ICmpVectorRelational) {
+  EXPECT_NOT_POISONED(poisoned(_mm_set1_epi16(0), _mm_set1_epi16(0)) <
+                      poisoned(_mm_set1_epi16(0), _mm_set1_epi16(0)));
+  EXPECT_NOT_POISONED(poisoned(_mm_set1_epi32(0), _mm_set1_epi32(0)) <
+                      poisoned(_mm_set1_epi32(0), _mm_set1_epi32(0)));
+  EXPECT_POISONED(poisoned(_mm_set1_epi16(0), _mm_set1_epi16(0xFFFF)) <
+                  poisoned(_mm_set1_epi16(0), _mm_set1_epi16(0xFFFF)));
+  EXPECT_POISONED(poisoned(_mm_set1_epi16(6), _mm_set1_epi16(0xF)) >
+                  poisoned(_mm_set1_epi16(7), _mm_set1_epi16(0)));
+}
+#endif
+
+// Volatile bitfield store is implemented as load-mask-store
+// Test that we don't warn on the store of (uninitialized) padding.
+struct VolatileBitfieldStruct {
+  volatile unsigned x : 1;
+  unsigned y : 1;
+};
+
+TEST(MemorySanitizer, VolatileBitfield) {
+  VolatileBitfieldStruct *S = new VolatileBitfieldStruct;
+  S->x = 1;
+  EXPECT_NOT_POISONED((unsigned)S->x);
+  EXPECT_POISONED((unsigned)S->y);
+}
+
+TEST(MemorySanitizerDr, StoreInDSOTest) {
+  if (!__msan_has_dynamic_component()) return;
+  char* s = new char[10];
+  dso_memfill(s, 9);
+  EXPECT_NOT_POISONED(s[5]);
+  EXPECT_POISONED(s[9]);
+}
+
+int return_poisoned_int() {
+  return ReturnPoisoned<U8>();
+}
+
+TEST(MemorySanitizerDr, ReturnFromDSOTest) {
+  if (!__msan_has_dynamic_component()) return;
+  EXPECT_NOT_POISONED(dso_callfn(return_poisoned_int));
+}
+
+NOINLINE int TrashParamTLS(long long x, long long y, long long z) {  //NOLINT
+  EXPECT_POISONED(x);
+  EXPECT_POISONED(y);
+  EXPECT_POISONED(z);
+  return 0;
+}
+
+static int CheckParamTLS(long long x, long long y, long long z) {  //NOLINT
+  EXPECT_NOT_POISONED(x);
+  EXPECT_NOT_POISONED(y);
+  EXPECT_NOT_POISONED(z);
+  return 0;
+}
+
+TEST(MemorySanitizerDr, CallFromDSOTest) {
+  if (!__msan_has_dynamic_component()) return;
+  S8* x = GetPoisoned<S8>();
+  S8* y = GetPoisoned<S8>();
+  S8* z = GetPoisoned<S8>();
+  EXPECT_NOT_POISONED(TrashParamTLS(*x, *y, *z));
+  EXPECT_NOT_POISONED(dso_callfn1(CheckParamTLS));
+}
+
+static void StackStoreInDSOFn(int* x, int* y) {
+  EXPECT_NOT_POISONED(*x);
+  EXPECT_NOT_POISONED(*y);
+}
+
+TEST(MemorySanitizerDr, StackStoreInDSOTest) {
+  if (!__msan_has_dynamic_component()) return;
+  dso_stack_store(StackStoreInDSOFn, 1);
+}
+
+TEST(MemorySanitizerOrigins, SetGet) {
+  EXPECT_EQ(TrackingOrigins(), __msan_get_track_origins());
+  if (!TrackingOrigins()) return;
+  int x;
+  __msan_set_origin(&x, sizeof(x), 1234);
+  EXPECT_EQ(1234, __msan_get_origin(&x));
+  __msan_set_origin(&x, sizeof(x), 5678);
+  EXPECT_EQ(5678, __msan_get_origin(&x));
+  __msan_set_origin(&x, sizeof(x), 0);
+  EXPECT_EQ(0, __msan_get_origin(&x));
+}
+
+namespace {
+struct S {
+  U4 dummy;
+  U2 a;
+  U2 b;
+};
+
+// http://code.google.com/p/memory-sanitizer/issues/detail?id=6
+TEST(MemorySanitizerOrigins, DISABLED_InitializedStoreDoesNotChangeOrigin) {
+  if (!TrackingOrigins()) return;
+
+  S s;
+  U4 origin = rand();  // NOLINT
+  s.a = *GetPoisonedO<U2>(0, origin);
+  EXPECT_EQ(origin, __msan_get_origin(&s.a));
+  EXPECT_EQ(origin, __msan_get_origin(&s.b));
+
+  s.b = 42;
+  EXPECT_EQ(origin, __msan_get_origin(&s.a));
+  EXPECT_EQ(origin, __msan_get_origin(&s.b));
+}
+}  // namespace
+
+template<class T, class BinaryOp>
+INLINE
+void BinaryOpOriginTest(BinaryOp op) {
+  U4 ox = rand();  //NOLINT
+  U4 oy = rand();  //NOLINT
+  T *x = GetPoisonedO<T>(0, ox, 0);
+  T *y = GetPoisonedO<T>(1, oy, 0);
+  T *z = GetPoisonedO<T>(2, 0, 0);
+
+  *z = op(*x, *y);
+  U4 origin = __msan_get_origin(z);
+  EXPECT_POISONED_O(*z, origin);
+  EXPECT_EQ(true, origin == ox || origin == oy);
+
+  // y is poisoned, x is not.
+  *x = 10101;
+  *y = *GetPoisonedO<T>(1, oy);
+  break_optimization(x);
+  __msan_set_origin(z, sizeof(*z), 0);
+  *z = op(*x, *y);
+  EXPECT_POISONED_O(*z, oy);
+  EXPECT_EQ(__msan_get_origin(z), oy);
+
+  // x is poisoned, y is not.
+  *x = *GetPoisonedO<T>(0, ox);
+  *y = 10101010;
+  break_optimization(y);
+  __msan_set_origin(z, sizeof(*z), 0);
+  *z = op(*x, *y);
+  EXPECT_POISONED_O(*z, ox);
+  EXPECT_EQ(__msan_get_origin(z), ox);
+}
+
+template<class T> INLINE T XOR(const T &a, const T&b) { return a ^ b; }
+template<class T> INLINE T ADD(const T &a, const T&b) { return a + b; }
+template<class T> INLINE T SUB(const T &a, const T&b) { return a - b; }
+template<class T> INLINE T MUL(const T &a, const T&b) { return a * b; }
+template<class T> INLINE T AND(const T &a, const T&b) { return a & b; }
+template<class T> INLINE T OR (const T &a, const T&b) { return a | b; }
+
+TEST(MemorySanitizerOrigins, BinaryOp) {
+  if (!TrackingOrigins()) return;
+  BinaryOpOriginTest<S8>(XOR<S8>);
+  BinaryOpOriginTest<U8>(ADD<U8>);
+  BinaryOpOriginTest<S4>(SUB<S4>);
+  BinaryOpOriginTest<S4>(MUL<S4>);
+  BinaryOpOriginTest<U4>(OR<U4>);
+  BinaryOpOriginTest<U4>(AND<U4>);
+  BinaryOpOriginTest<double>(ADD<U4>);
+  BinaryOpOriginTest<float>(ADD<S4>);
+  BinaryOpOriginTest<double>(ADD<double>);
+  BinaryOpOriginTest<float>(ADD<double>);
+}
+
+TEST(MemorySanitizerOrigins, Unary) {
+  if (!TrackingOrigins()) return;
+  EXPECT_POISONED_O(*GetPoisonedO<S8>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S8>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S8>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S8>(0, __LINE__), __LINE__);
+
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+
+  EXPECT_POISONED_O(*GetPoisonedO<U4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<U4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<U4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<U4>(0, __LINE__), __LINE__);
+
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+
+  EXPECT_POISONED_O((void*)*GetPoisonedO<S8>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O((U8)*GetPoisonedO<void*>(0, __LINE__), __LINE__);
+}
+
+TEST(MemorySanitizerOrigins, EQ) {
+  if (!TrackingOrigins()) return;
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__) <= 11, __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__) == 11, __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<float>(0, __LINE__) == 1.1, __LINE__);
+}
+
+TEST(MemorySanitizerOrigins, DIV) {
+  if (!TrackingOrigins()) return;
+  EXPECT_POISONED_O(*GetPoisonedO<U8>(0, __LINE__) / 100, __LINE__);
+  unsigned o = __LINE__;
+  EXPECT_UMR_O(volatile unsigned y = 100 / *GetPoisonedO<S4>(0, o, 1), o);
+}
+
+TEST(MemorySanitizerOrigins, SHIFT) {
+  if (!TrackingOrigins()) return;
+  EXPECT_POISONED_O(*GetPoisonedO<U8>(0, __LINE__) >> 10, __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S8>(0, __LINE__) >> 10, __LINE__);
+  EXPECT_POISONED_O(*GetPoisonedO<S8>(0, __LINE__) << 10, __LINE__);
+  EXPECT_POISONED_O(10U << *GetPoisonedO<U8>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(-10 >> *GetPoisonedO<S8>(0, __LINE__), __LINE__);
+  EXPECT_POISONED_O(-10 << *GetPoisonedO<S8>(0, __LINE__), __LINE__);
+}
+
+template<class T, int N>
+void MemCpyTest() {
+  int ox = __LINE__;
+  T *x = new T[N];
+  T *y = new T[N];
+  T *z = new T[N];
+  __msan_poison(x, N * sizeof(T));
+  __msan_set_origin(x, N * sizeof(T), ox);
+  __msan_set_origin(y, N * sizeof(T), 777777);
+  __msan_set_origin(z, N * sizeof(T), 888888);
+  EXPECT_NOT_POISONED(x);
+  memcpy(y, x, N * sizeof(T));
+  EXPECT_POISONED_O(y[0], ox);
+  EXPECT_POISONED_O(y[N/2], ox);
+  EXPECT_POISONED_O(y[N-1], ox);
+  EXPECT_NOT_POISONED(x);
+  memmove(z, x, N * sizeof(T));
+  EXPECT_POISONED_O(z[0], ox);
+  EXPECT_POISONED_O(z[N/2], ox);
+  EXPECT_POISONED_O(z[N-1], ox);
+}
+
+TEST(MemorySanitizerOrigins, LargeMemCpy) {
+  if (!TrackingOrigins()) return;
+  MemCpyTest<U1, 10000>();
+  MemCpyTest<U8, 10000>();
+}
+
+TEST(MemorySanitizerOrigins, SmallMemCpy) {
+  if (!TrackingOrigins()) return;
+  MemCpyTest<U8, 1>();
+  MemCpyTest<U8, 2>();
+  MemCpyTest<U8, 3>();
+}
+
+TEST(MemorySanitizerOrigins, Select) {
+  if (!TrackingOrigins()) return;
+  EXPECT_NOT_POISONED(g_one ? 1 : *GetPoisonedO<S4>(0, __LINE__));
+  EXPECT_POISONED_O(*GetPoisonedO<S4>(0, __LINE__), __LINE__);
+  S4 x;
+  break_optimization(&x);
+  x = g_1 ? *GetPoisonedO<S4>(0, __LINE__) : 0;
+
+  EXPECT_POISONED_O(g_1 ? *GetPoisonedO<S4>(0, __LINE__) : 1, __LINE__);
+  EXPECT_POISONED_O(g_0 ? 1 : *GetPoisonedO<S4>(0, __LINE__), __LINE__);
+}
+
+extern "C"
+NOINLINE char AllocaTO() {
+  int ar[100];
+  break_optimization(ar);
+  return ar[10];
+  // fprintf(stderr, "Descr: %s\n",
+  //        __msan_get_origin_descr_if_stack(__msan_get_origin_tls()));
+}
+
+TEST(MemorySanitizerOrigins, Alloca) {
+  if (!TrackingOrigins()) return;
+  EXPECT_POISONED_S(AllocaTO(), "ar@AllocaTO");
+  EXPECT_POISONED_S(AllocaTO(), "ar@AllocaTO");
+  EXPECT_POISONED_S(AllocaTO(), "ar@AllocaTO");
+  EXPECT_POISONED_S(AllocaTO(), "ar@AllocaTO");
+}
+
+// FIXME: replace with a lit-like test.
+TEST(MemorySanitizerOrigins, DISABLED_AllocaDeath) {
+  if (!TrackingOrigins()) return;
+  EXPECT_DEATH(AllocaTO(), "ORIGIN: stack allocation: ar@AllocaTO");
+}
+
+NOINLINE int RetvalOriginTest(U4 origin) {
+  int *a = new int;
+  break_optimization(a);
+  __msan_set_origin(a, sizeof(*a), origin);
+  int res = *a;
+  delete a;
+  return res;
+}
+
+TEST(MemorySanitizerOrigins, Retval) {
+  if (!TrackingOrigins()) return;
+  EXPECT_POISONED_O(RetvalOriginTest(__LINE__), __LINE__);
+}
+
+NOINLINE void ParamOriginTest(int param, U4 origin) {
+  EXPECT_POISONED_O(param, origin);
+}
+
+TEST(MemorySanitizerOrigins, Param) {
+  if (!TrackingOrigins()) return;
+  int *a = new int;
+  U4 origin = __LINE__;
+  break_optimization(a);
+  __msan_set_origin(a, sizeof(*a), origin);
+  ParamOriginTest(*a, origin);
+  delete a;
+}
+
+TEST(MemorySanitizerOrigins, Invoke) {
+  if (!TrackingOrigins()) return;
+  StructWithDtor s;  // Will cause the calls to become invokes.
+  EXPECT_POISONED_O(RetvalOriginTest(__LINE__), __LINE__);
+}
+
+TEST(MemorySanitizerOrigins, strlen) {
+  S8 alignment;
+  break_optimization(&alignment);
+  char x[4] = {'a', 'b', 0, 0};
+  __msan_poison(&x[2], 1);
+  U4 origin = __LINE__;
+  __msan_set_origin(x, sizeof(x), origin);
+  EXPECT_UMR_O(volatile unsigned y = strlen(x), origin);
+}
+
+TEST(MemorySanitizerOrigins, wcslen) {
+  wchar_t w[3] = {'a', 'b', 0};
+  U4 origin = __LINE__;
+  __msan_set_origin(w, sizeof(w), origin);
+  __msan_poison(&w[2], sizeof(wchar_t));
+  EXPECT_UMR_O(volatile unsigned y = wcslen(w), origin);
+}
+
+#if MSAN_HAS_M128
+TEST(MemorySanitizerOrigins, StoreIntrinsic) {
+  __m128 x, y;
+  U4 origin = __LINE__;
+  __msan_set_origin(&x, sizeof(x), origin);
+  __msan_poison(&x, sizeof(x));
+  __builtin_ia32_storeups((float*)&y, x);
+  EXPECT_POISONED_O(y, origin);
+}
+#endif
+
+NOINLINE void RecursiveMalloc(int depth) {
+  static int count;
+  count++;
+  if ((count % (1024 * 1024)) == 0)
+    printf("RecursiveMalloc: %d\n", count);
+  int *x1 = new int;
+  int *x2 = new int;
+  break_optimization(x1);
+  break_optimization(x2);
+  if (depth > 0) {
+    RecursiveMalloc(depth-1);
+    RecursiveMalloc(depth-1);
+  }
+  delete x1;
+  delete x2;
+}
+
+TEST(MemorySanitizer, CallocOverflow) {
+  size_t kArraySize = 4096;
+  volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
+  volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;
+  void *p = calloc(kArraySize, kArraySize2);  // Should return 0.
+  EXPECT_EQ(0L, Ident(p));
+}
+
+TEST(MemorySanitizerStress, DISABLED_MallocStackTrace) {
+  RecursiveMalloc(22);
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  int res = RUN_ALL_TESTS();
+  return res;
+}
diff --git a/lib/msan/tests/msandr_test_so.cc b/lib/msan/tests/msandr_test_so.cc
new file mode 100644
index 0000000..eb605d4
--- /dev/null
+++ b/lib/msan/tests/msandr_test_so.cc
@@ -0,0 +1,38 @@
+//===-- msandr_test_so.cc  ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer unit tests.
+//===----------------------------------------------------------------------===//
+
+#include "msandr_test_so.h"
+
+void dso_memfill(char* s, unsigned n) {
+  for (unsigned i = 0; i < n; ++i)
+    s[i] = i;
+}
+
+int dso_callfn(int (*fn)(void)) {
+  volatile int x = fn();
+  return x;
+}
+
+int dso_callfn1(int (*fn)(long long, long long, long long)) {  //NOLINT
+  volatile int x = fn(1, 2, 3);
+  return x;
+}
+
+int dso_stack_store(void (*fn)(int*, int*), int x) {
+  int y = x + 1;
+  fn(&x, &y);
+  return y;
+}
+
+void break_optimization(void *x) {}
diff --git a/lib/msan/tests/msandr_test_so.h b/lib/msan/tests/msandr_test_so.h
new file mode 100644
index 0000000..cd75ff3
--- /dev/null
+++ b/lib/msan/tests/msandr_test_so.h
@@ -0,0 +1,24 @@
+//===-- msandr_test_so.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer unit tests.
+//===----------------------------------------------------------------------===//
+
+#ifndef MSANDR_MSANDR_TEST_SO_H
+#define MSANDR_MSANDR_TEST_SO_H
+
+void dso_memfill(char* s, unsigned n);
+int dso_callfn(int (*fn)(void));
+int dso_callfn1(int (*fn)(long long, long long, long long));  //NOLINT
+int dso_stack_store(void (*fn)(int*, int*), int x);
+void break_optimization(void *x);
+
+#endif
diff --git a/lib/msandr/CMakeLists.txt b/lib/msandr/CMakeLists.txt
new file mode 100644
index 0000000..5a96a9d
--- /dev/null
+++ b/lib/msandr/CMakeLists.txt
@@ -0,0 +1,26 @@
+
+if(DynamoRIO_DIR AND DrMemoryFramework_DIR)
+  set(CMAKE_COMPILER_IS_GNUCC 1)
+  find_package(DynamoRIO)
+  find_package(DrMemoryFramework)
+
+  set(arch "x86_64")
+  add_library(clang_rt.msandr-${arch} SHARED msandr.cc)
+  configure_DynamoRIO_client(clang_rt.msandr-${arch})
+
+  function(append_target_cflags tgt cflags)
+    get_property(old_cflags TARGET clang_rt.msandr-${arch} PROPERTY COMPILE_FLAGS)
+    set_property(TARGET clang_rt.msandr-${arch} PROPERTY COMPILE_FLAGS "${old_cflags} ${cflags}")
+  endfunction(append_target_cflags)
+
+  append_target_cflags(clang_rt.msandr-${arch} "-Wno-c++11-extensions")
+
+  use_DynamoRIO_extension(clang_rt.msandr-${arch} drutil)
+  use_DynamoRIO_extension(clang_rt.msandr-${arch} drmgr)
+  use_DynamoRIO_extension(clang_rt.msandr-${arch} drsyscall)
+
+  set_target_properties(clang_rt.msandr-${arch} PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
+  install(TARGETS clang_rt.msandr-${arch}
+    LIBRARY DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
+endif()
diff --git a/lib/msandr/README.txt b/lib/msandr/README.txt
new file mode 100644
index 0000000..b328910
--- /dev/null
+++ b/lib/msandr/README.txt
@@ -0,0 +1,33 @@
+Experimental DynamoRIO-MSAN plugin (codename "MSanDR").
+Supports Linux/x86_64 only.
+
+Building:
+  1. First, download and build DynamoRIO:
+     (svn co https://dynamorio.googlecode.com/svn/trunk dr && \
+      cd dr && mkdir build && cd build && \
+      cmake -DDR_EXT_DRMGR_STATIC=ON -DDR_EXT_DRSYMS_STATIC=ON \
+            -DDR_EXT_DRUTIL_STATIC=ON -DDR_EXT_DRWRAP_STATIC=ON .. && \
+      make -j10 && make install)
+
+  2. Download and build DrMemory (for DrSyscall extension)
+     (svn co http://drmemory.googlecode.com/svn/trunk/ drmemory && \
+      cd drmemory && mkdir build && cd build && \
+      cmake -DDynamoRIO_DIR=`pwd`/../../dr/exports/cmake .. && \
+      make -j10 && make install)
+
+  NOTE: The line above will build a shared DrSyscall library in a non-standard
+  location. This will require the use of LD_LIBRARY_PATH when running MSanDR.
+  To build a static DrSyscall library (and link it into MSanDR), add
+  -DDR_EXT_DRSYSCALL_STATIC=ON to the CMake invocation above, but
+  beware: DrSyscall is LGPL.
+
+  3. Now, build LLVM with two extra CMake flags:
+       -DDynamoRIO_DIR=<path_to_dynamorio>/exports/cmake
+       -DDrMemoryFramework_DIR=<path_to_drmemory>/exports64/drmf
+
+  This will build a lib/clang/$VERSION/lib/linux/libclang_rt.msandr-x86_64.so
+
+Running:
+  <path_to_dynamorio>/exports/bin64/drrun -c lib/clang/$VERSION/lib/linux/libclang_rt.msandr-x86_64.so -- test_binary
+
+MSan unit tests contain several tests for MSanDR (use MemorySanitizerDr.* gtest filter).
diff --git a/lib/msandr/msandr.cc b/lib/msandr/msandr.cc
new file mode 100644
index 0000000..fee9834
--- /dev/null
+++ b/lib/msandr/msandr.cc
@@ -0,0 +1,723 @@
+//===-- msandr.cc ---------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// DynamoRio client for MemorySanitizer.
+//
+// MemorySanitizer requires that all program code is instrumented. Any memory
+// store that can turn an uninitialized value into an initialized value must be
+// observed by the tool, otherwise we risk reporting a false UMR.
+//
+// This also includes any libraries that the program depends on.
+//
+// In the case when rebuilding all program dependencies with MemorySanitizer is
+// problematic, an experimental MSanDR tool (the code you are currently looking
+// at) can be used. It is a DynamoRio-based tool that uses dynamic
+// instrumentation to
+// * Unpoison all memory stores.
+// * Unpoison TLS slots used by MemorySanitizer to pass function arguments and
+//   return value shadow on anything that looks like a function call or a return
+//   from a function.
+//
+// This tool does not detect the use of uninitialized values in uninstrumented
+// libraries. It merely gets rid of false positives by marking all data that
+// passes through uninstrumented code as fully initialized.
+//===----------------------------------------------------------------------===//
+
+#include <dr_api.h>
+#include <drutil.h>
+#include <drmgr.h>
+#include <drsyscall.h>
+
+#include <sys/mman.h>
+#include <sys/syscall.h>  /* for SYS_mmap */
+
+#include <algorithm>
+#include <string>
+#include <set>
+#include <vector>
+#include <string.h>
+
+using std::string;
+
+#define TESTALL(mask, var) (((mask) & (var)) == (mask))
+#define TESTANY(mask, var) (((mask) & (var)) != 0)
+
+#define CHECK_IMPL(condition, file, line)                                      \
+  do {                                                                         \
+    if (!(condition)) {                                                        \
+      dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line);     \
+      dr_abort();                                                              \
+    }                                                                          \
+  } while (0) // TODO: stacktrace
+
+#define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__)
+
+#define VERBOSITY 0
+
+namespace {
+
+class ModuleData {
+public:
+  ModuleData();
+  ModuleData(const module_data_t *info);
+  // Yes, we want default copy, assign, and dtor semantics.
+
+public:
+  app_pc start_;
+  app_pc end_;
+  // Full path to the module.
+  string path_;
+  module_handle_t handle_;
+  bool should_instrument_;
+  bool executed_;
+};
+
+string g_app_path;
+
+int msan_retval_tls_offset;
+int msan_param_tls_offset;
+
+// A vector of loaded modules sorted by module bounds.  We lookup the current PC
+// in here from the bb event.  This is better than an rb tree because the lookup
+// is faster and the bb event occurs far more than the module load event.
+std::vector<ModuleData> g_module_list;
+
+ModuleData::ModuleData()
+    : start_(NULL), end_(NULL), path_(""), handle_(NULL),
+      should_instrument_(false), executed_(false) {
+}
+
+ModuleData::ModuleData(const module_data_t *info)
+    : start_(info->start), end_(info->end), path_(info->full_path),
+      handle_(info->handle),
+      // We'll check the black/white lists later and adjust this.
+      should_instrument_(true), executed_(false) {
+}
+
+int(*__msan_get_retval_tls_offset)();
+int(*__msan_get_param_tls_offset)();
+void (*__msan_unpoison)(void *base, size_t size);
+bool (*__msan_is_in_loader)();
+
+static generic_func_t LookupCallback(module_data_t *app, const char *name) {
+  generic_func_t callback = dr_get_proc_address(app->handle, name);
+  if (callback == NULL) {
+    dr_printf("Couldn't find `%s` in %s\n", name, app->full_path);
+    CHECK(callback);
+  }
+  return callback;
+}
+
+void InitializeMSanCallbacks() {
+  module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
+  if (!app) {
+    dr_printf("%s - oops, dr_lookup_module_by_name failed!\n",
+              dr_get_application_name());
+    CHECK(app);
+  }
+  g_app_path = app->full_path;
+
+  __msan_get_retval_tls_offset = (int (*)())
+      LookupCallback(app, "__msan_get_retval_tls_offset");
+  __msan_get_param_tls_offset = (int (*)())
+      LookupCallback(app, "__msan_get_param_tls_offset");
+  __msan_unpoison = (void(*)(void *, size_t))
+      LookupCallback(app, "__msan_unpoison");
+  __msan_is_in_loader = (bool (*)())
+      LookupCallback(app, "__msan_is_in_loader");
+
+  dr_free_module_data(app);
+}
+
+// FIXME: Handle absolute addresses and PC-relative addresses.
+// FIXME: Handle TLS accesses via FS or GS.  DR assumes all other segments have
+// a zero base anyway.
+bool OperandIsInteresting(opnd_t opnd) {
+  return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS &&
+          opnd_get_segment(opnd) != DR_SEG_GS);
+}
+
+bool WantToInstrument(instr_t *instr) {
+  // TODO: skip push instructions?
+  switch (instr_get_opcode(instr)) {
+    // FIXME: support the instructions excluded below:
+  case OP_rep_cmps:
+    // f3 a6    rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx
+    return false;
+  }
+
+  // Labels appear due to drutil_expand_rep_string()
+  if (instr_is_label(instr))
+    return false;
+
+  CHECK(instr_ok_to_mangle(instr) == true);
+
+  if (instr_writes_memory(instr)) {
+    for (int d = 0; d < instr_num_dsts(instr); d++) {
+      opnd_t op = instr_get_dst(instr, d);
+      if (OperandIsInteresting(op))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+#define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what);
+#define PREF(at, what) instrlist_meta_preinsert(bb, at, what);
+
+void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op,
+                    bool is_write) {
+  bool need_to_restore_eflags = false;
+  uint flags = instr_get_arith_flags(instr);
+  // TODO: do something smarter with flags and spills in general?
+  // For example, spill them only once for a sequence of instrumented
+  // instructions that don't change/read flags.
+
+  if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) {
+    if (VERBOSITY > 1)
+      dr_printf("Spilling eflags...\n");
+    need_to_restore_eflags = true;
+    // TODO: Maybe sometimes don't need to 'seto'.
+    // TODO: Maybe sometimes don't want to spill XAX here?
+    // TODO: No need to spill XAX here if XAX is not used in the BB.
+    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+    dr_save_arith_flags_to_xax(drcontext, bb, instr);
+    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
+    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+  }
+
+#if 0
+  dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n",
+            opnd_is_memory_reference(op), opnd_is_base_disp(op),
+            opnd_is_base_disp(op) ? opnd_get_index(op) : -1,
+            opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op),
+            opnd_is_base_disp(op) ? opnd_get_disp(op) : -1);
+#endif
+
+  reg_id_t R1;
+  bool address_in_R1 = false;
+  if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL &&
+      opnd_get_disp(op) == 0) {
+    // If this is a simple access with no offset or index, we can just use the
+    // base for R1.
+    address_in_R1 = true;
+    R1 = opnd_get_base(op);
+  } else {
+    // Otherwise, we need to compute the addr into R1.
+    // TODO: reuse some spare register? e.g. r15 on x64
+    // TODO: might be used as a non-mem-ref register?
+    R1 = DR_REG_XAX;
+  }
+  CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong.
+
+  // Pick R2 that's not R1 or used by the operand.  It's OK if the instr uses
+  // R2 elsewhere, since we'll restore it before instr.
+  reg_id_t GPR_TO_USE_FOR_R2[] = {
+    DR_REG_XAX, DR_REG_XBX, DR_REG_XCX, DR_REG_XDX
+    // Don't forget to update the +4 below if you add anything else!
+  };
+  std::set<reg_id_t> unused_registers(GPR_TO_USE_FOR_R2, GPR_TO_USE_FOR_R2 + 4);
+  unused_registers.erase(R1);
+  for (int j = 0; j < opnd_num_regs_used(op); j++) {
+    unused_registers.erase(opnd_get_reg_used(op, j));
+  }
+
+  CHECK(unused_registers.size() > 0);
+  reg_id_t R2 = *unused_registers.begin();
+  CHECK(R1 != R2);
+
+  // Save the current values of R1 and R2.
+  dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
+  // TODO: Something smarter than spilling a "fixed" register R2?
+  dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
+
+  if (!address_in_R1)
+    CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2));
+  PRE(instr, mov_imm(drcontext, opnd_create_reg(R2),
+                     OPND_CREATE_INT64(0xffffbfffffffffff)));
+  PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2)));
+  // There is no mov_st of a 64-bit immediate, so...
+  opnd_size_t op_size = opnd_get_size(op);
+  CHECK(op_size != OPSZ_NA);
+  uint access_size = opnd_size_in_bytes(op_size);
+  if (access_size <= 4) {
+    PRE(instr,
+        mov_st(drcontext, opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size),
+               opnd_create_immed_int((ptr_int_t) 0, op_size)));
+  } else {
+    // FIXME: tail?
+    for (uint ofs = 0; ofs < access_size; ofs += 4) {
+      PRE(instr,
+          mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), OPND_CREATE_INT32(0)));
+    }
+  }
+
+  // Restore the registers and flags.
+  dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
+  dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
+
+  if (need_to_restore_eflags) {
+    if (VERBOSITY > 1)
+      dr_printf("Restoring eflags\n");
+    // TODO: Check if it's reverse to the dr_restore_reg above and optimize.
+    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
+    dr_restore_arith_flags_from_xax(drcontext, bb, instr);
+    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+  }
+
+  // The original instruction is left untouched. The above instrumentation is just
+  // a prefix.
+}
+
+void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) {
+  dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+
+  // Clobbers nothing except xax.
+  bool res =
+      dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
+  CHECK(res);
+
+  // TODO: unpoison more bytes?
+  PRE(instr,
+      mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset),
+             OPND_CREATE_INT32(0)));
+
+  dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+
+  // The original instruction is left untouched. The above instrumentation is just
+  // a prefix.
+}
+
+void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb,
+                              instr_t *instr) {
+  dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+
+  // Clobbers nothing except xax.
+  bool res =
+      dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
+  CHECK(res);
+
+  // TODO: unpoison more bytes?
+  for (int i = 0; i < 6; ++i) {
+    PRE(instr,
+        mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset +
+                                                         i * sizeof(void *)),
+               OPND_CREATE_INT32(0)));
+  }
+
+  dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+
+  // The original instruction is left untouched. The above instrumentation is just
+  // a prefix.
+}
+
+// For use with binary search.  Modules shouldn't overlap, so we shouldn't have
+// to look at end_.  If that can happen, we won't support such an application.
+bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) {
+  return left.start_ < right.start_;
+}
+
+// Look up the module containing PC.  Should be relatively fast, as its called
+// for each bb instrumentation.
+ModuleData *LookupModuleByPC(app_pc pc) {
+  ModuleData fake_mod_data;
+  fake_mod_data.start_ = pc;
+  std::vector<ModuleData>::iterator it =
+      lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data,
+                  ModuleDataCompareStart);
+  // if (it == g_module_list.end())
+  //   return NULL;
+  if (it == g_module_list.end() || pc < it->start_)
+    --it;
+  CHECK(it->start_ <= pc);
+  if (pc >= it->end_) {
+    // We're past the end of this module.  We shouldn't be in the next module,
+    // or lower_bound lied to us.
+    ++it;
+    CHECK(it == g_module_list.end() || pc < it->start_);
+    return NULL;
+  }
+
+  // OK, we found the module.
+  return &*it;
+}
+
+bool ShouldInstrumentNonModuleCode() { return true; }
+
+bool ShouldInstrumentModule(ModuleData *mod_data) {
+  // TODO(rnk): Flags for blacklist would get wired in here.
+  generic_func_t p =
+      dr_get_proc_address(mod_data->handle_, "__msan_track_origins");
+  return !p;
+}
+
+bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) {
+  ModuleData *mod_data = LookupModuleByPC(pc);
+  if (pmod_data)
+    *pmod_data = mod_data;
+  if (mod_data != NULL) {
+    // This module is on a blacklist.
+    if (!mod_data->should_instrument_) {
+      return false;
+    }
+  } else if (!ShouldInstrumentNonModuleCode()) {
+    return false;
+  }
+  return true;
+}
+
+// TODO(rnk): Make sure we instrument after __msan_init.
+dr_emit_flags_t
+event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb,
+                          bool for_trace, bool translating) {
+  app_pc pc = dr_fragment_app_pc(tag);
+
+  if (ShouldInstrumentPc(pc, NULL))
+    CHECK(drutil_expand_rep_string(drcontext, bb));
+
+  return DR_EMIT_PERSISTABLE;
+}
+
+dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
+                                  bool for_trace, bool translating) {
+  app_pc pc = dr_fragment_app_pc(tag);
+  ModuleData *mod_data;
+
+  if (!ShouldInstrumentPc(pc, &mod_data))
+    return DR_EMIT_PERSISTABLE;
+
+  if (VERBOSITY > 1)
+    dr_printf("============================================================\n");
+  if (VERBOSITY > 0) {
+    string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>");
+    if (mod_data && !mod_data->executed_) {
+      mod_data->executed_ = true; // Nevermind this race.
+      dr_printf("Executing from new module: %s\n", mod_path.c_str());
+    }
+    dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc,
+        mod_path.c_str(), translating ? "true" : "false");
+    if (mod_data) {
+      // Match standard sanitizer trace format for free symbols.
+      // #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
+      dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(),
+          pc - mod_data->start_);
+    }
+  }
+  if (VERBOSITY > 1) {
+    instrlist_disassemble(drcontext, pc, bb, STDOUT);
+    instr_t *instr;
+    for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) {
+      dr_printf("opcode: %d\n", instr_get_opcode(instr));
+    }
+  }
+
+  for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) {
+    int opcode = instr_get_opcode(i);
+    if (opcode == OP_ret || opcode == OP_ret_far) {
+      InstrumentReturn(drcontext, bb, i);
+      continue;
+    }
+
+    // These instructions hopefully cover all cases where control is transferred
+    // to a function in a different module (we only care about calls into
+    // compiler-instrumented modules).
+    // * call_ind is used for normal indirect calls.
+    // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT
+    //   stub includes a jump to an address from GOT).
+    if (opcode == OP_call_ind || opcode == OP_call_far_ind ||
+        opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) {
+      InstrumentIndirectBranch(drcontext, bb, i);
+      continue;
+    }
+
+    if (!WantToInstrument(i))
+      continue;
+
+    if (VERBOSITY > 1) {
+      app_pc orig_pc = dr_fragment_app_pc(tag);
+      uint flags = instr_get_arith_flags(i);
+      dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n",
+          instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags);
+    }
+
+    if (instr_writes_memory(i)) {
+      // Instrument memory writes
+      // bool instrumented_anything = false;
+      for (int d = 0; d < instr_num_dsts(i); d++) {
+        opnd_t op = instr_get_dst(i, d);
+        if (!OperandIsInteresting(op))
+          continue;
+
+        // CHECK(!instrumented_anything);
+        // instrumented_anything = true;
+        InstrumentMops(drcontext, bb, i, op, true);
+        break; // only instrumenting the first dst
+      }
+    }
+  }
+
+// TODO: optimize away redundant restore-spill pairs?
+
+  if (VERBOSITY > 1) {
+    pc = dr_fragment_app_pc(tag);
+    dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc);
+    instrlist_disassemble(drcontext, pc, bb, STDOUT);
+  }
+  return DR_EMIT_PERSISTABLE;
+}
+
+void event_module_load(void *drcontext, const module_data_t *info,
+                       bool loaded) {
+  // Insert the module into the list while maintaining the ordering.
+  ModuleData mod_data(info);
+  std::vector<ModuleData>::iterator it =
+      upper_bound(g_module_list.begin(), g_module_list.end(), mod_data,
+                  ModuleDataCompareStart);
+  it = g_module_list.insert(it, mod_data);
+  // Check if we should instrument this module.
+  it->should_instrument_ = ShouldInstrumentModule(&*it);
+  dr_module_set_should_instrument(info->handle, it->should_instrument_);
+
+  if (VERBOSITY > 0)
+    dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n",
+        info->full_path, info->start, info->end,
+        it->should_instrument_ ? "on" : "off");
+}
+
+void event_module_unload(void *drcontext, const module_data_t *info) {
+  if (VERBOSITY > 0)
+    dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path,
+        info->start, info->end);
+
+  // Remove the module from the list.
+  ModuleData mod_data(info);
+  std::vector<ModuleData>::iterator it =
+      lower_bound(g_module_list.begin(), g_module_list.end(), mod_data,
+                  ModuleDataCompareStart);
+  // It's a bug if we didn't actually find the module.
+  CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ &&
+        it->end_ == mod_data.end_ && it->path_ == mod_data.path_);
+  g_module_list.erase(it);
+}
+
+void event_exit() {
+  if (VERBOSITY > 0)
+    dr_printf("==DRMSAN== DONE\n");
+}
+
+bool event_filter_syscall(void *drcontext, int sysnum) {
+  // FIXME: only intercept syscalls with memory effects.
+  return true; /* intercept everything */
+}
+
+bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
+  CHECK(arg->valid);
+
+  if (arg->pre)
+    return true;
+  if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))
+    return true;
+
+  size_t sz = arg->size;
+
+  if (sz > 0xFFFFFFFF) {
+    drmf_status_t res;
+    drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
+    const char *name;
+    res = drsys_syscall_name(syscall, &name);
+    CHECK(res == DRMF_SUCCESS);
+
+    dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!"
+              " Clipping to %llu.\n",
+              name, arg->ordinal, (unsigned long long) sz,
+              (unsigned long long)(sz & 0xFFFFFFFF));
+  }
+
+  if (VERBOSITY > 0) {
+    drmf_status_t res;
+    drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
+    const char *name;
+    res = drsys_syscall_name(syscall, &name);
+    dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
+              name, arg->ordinal, arg->start_addr,
+              (char *)arg->start_addr + sz);
+  }
+
+  // We don't switch to the app context because __msan_unpoison() doesn't need
+  // TLS segments.
+  __msan_unpoison(arg->start_addr, sz);
+
+  return true; /* keep going */
+}
+
+bool event_pre_syscall(void *drcontext, int sysnum) {
+  drsys_syscall_t *syscall;
+  drsys_sysnum_t sysnum_full;
+  bool known;
+  drsys_param_type_t ret_type;
+  drmf_status_t res;
+  const char *name;
+
+  res = drsys_cur_syscall(drcontext, &syscall);
+  CHECK(res == DRMF_SUCCESS);
+
+  res = drsys_syscall_number(syscall, &sysnum_full);
+  CHECK(res == DRMF_SUCCESS);
+  CHECK(sysnum == sysnum_full.number);
+
+  res = drsys_syscall_is_known(syscall, &known);
+  CHECK(res == DRMF_SUCCESS);
+
+  res = drsys_syscall_name(syscall, &name);
+  CHECK(res == DRMF_SUCCESS);
+
+  res = drsys_syscall_return_type(syscall, &ret_type);
+  CHECK(res == DRMF_SUCCESS);
+  CHECK(ret_type != DRSYS_TYPE_INVALID);
+  CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN);
+
+  res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL);
+  CHECK(res == DRMF_SUCCESS);
+
+  return true;
+}
+
+static bool IsInLoader(void *drcontext) {
+  // TODO: This segment swap is inefficient.  DR should just let us query the
+  // app segment base, which it has.  Alternatively, if we disable
+  // -mangle_app_seg, then we won't need the swap.
+  bool need_swap = !dr_using_app_state(drcontext);
+  if (need_swap)
+    dr_switch_to_app_state(drcontext);
+  bool is_in_loader = __msan_is_in_loader();
+  if (need_swap)
+    dr_switch_to_dr_state(drcontext);
+  return is_in_loader;
+}
+
+void event_post_syscall(void *drcontext, int sysnum) {
+  drsys_syscall_t *syscall;
+  drsys_sysnum_t sysnum_full;
+  bool success = false;
+  drmf_status_t res;
+
+  res = drsys_cur_syscall(drcontext, &syscall);
+  CHECK(res == DRMF_SUCCESS);
+
+  res = drsys_syscall_number(syscall, &sysnum_full);
+  CHECK(res == DRMF_SUCCESS);
+  CHECK(sysnum == sysnum_full.number);
+
+  res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext),
+                                &success);
+  CHECK(res == DRMF_SUCCESS);
+
+  if (success) {
+    res =
+        drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
+    CHECK(res == DRMF_SUCCESS);
+  }
+
+  // Our normal mmap interceptor can't intercept calls from the loader itself.
+  // This means we don't clear the shadow for calls to dlopen.  For now, we
+  // solve this by intercepting mmap from ld.so here, but ideally we'd have a
+  // solution that doesn't rely on msandr.
+  //
+  // Be careful not to intercept maps done by the msan rtl.  Otherwise we end up
+  // unpoisoning vast regions of memory and OOMing.
+  // TODO: __msan_unpoison() could "flush" large regions of memory like tsan
+  // does instead of doing a large memset.  However, we need the memory to be
+  // zeroed, where as tsan does not, so plain madvise is not enough.
+  if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) {
+    if (IsInLoader(drcontext)) {
+      app_pc base = (app_pc)dr_syscall_get_result(drcontext);
+      ptr_uint_t size;
+      drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size);
+      CHECK(res == DRMF_SUCCESS);
+      if (VERBOSITY > 0)
+        dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size);
+      // We don't switch to the app context because __msan_unpoison() doesn't
+      // need TLS segments.
+      __msan_unpoison(base, size);
+    }
+  }
+}
+
+} // namespace
+
+DR_EXPORT void dr_init(client_id_t id) {
+  drmf_status_t res;
+
+  drmgr_init();
+  drutil_init();
+
+  string app_name = dr_get_application_name();
+  // This blacklist will still run these apps through DR's code cache.  On the
+  // other hand, we are able to follow children of these apps.
+  // FIXME: Once DR has detach, we could just detach here.  Alternatively,
+  // if DR had a fork or exec hook to let us decide there, that would be nice.
+  // FIXME: make the blacklist cmd-adjustable.
+  if (app_name == "python" || app_name == "python2.7" || app_name == "bash" ||
+      app_name == "sh" || app_name == "true" || app_name == "exit" ||
+      app_name == "yes" || app_name == "echo")
+    return;
+
+  drsys_options_t ops;
+  memset(&ops, 0, sizeof(ops));
+  ops.struct_size = sizeof(ops);
+  ops.analyze_unknown_syscalls = false;
+
+  res = drsys_init(id, &ops);
+  CHECK(res == DRMF_SUCCESS);
+
+  dr_register_filter_syscall_event(event_filter_syscall);
+  drmgr_register_pre_syscall_event(event_pre_syscall);
+  drmgr_register_post_syscall_event(event_post_syscall);
+  res = drsys_filter_all_syscalls();
+  CHECK(res == DRMF_SUCCESS);
+
+  InitializeMSanCallbacks();
+
+  // FIXME: the shadow is initialized earlier when DR calls one of our wrapper
+  // functions. This may change one day.
+  // TODO: make this more robust.
+
+  void *drcontext = dr_get_current_drcontext();
+
+  dr_switch_to_app_state(drcontext);
+  msan_retval_tls_offset = __msan_get_retval_tls_offset();
+  msan_param_tls_offset = __msan_get_param_tls_offset();
+  dr_switch_to_dr_state(drcontext);
+  if (VERBOSITY > 0) {
+    dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset);
+    dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset);
+  }
+
+  // Standard DR events.
+  dr_register_exit_event(event_exit);
+
+  drmgr_priority_t priority = {
+    sizeof(priority), /* size of struct */
+    "msandr",         /* name of our operation */
+    NULL,             /* optional name of operation we should precede */
+    NULL,             /* optional name of operation we should follow */
+    0
+  };                  /* numeric priority */
+
+  drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority);
+  drmgr_register_bb_instru2instru_event(event_basic_block, &priority);
+  drmgr_register_module_load_event(event_module_load);
+  drmgr_register_module_unload_event(event_module_unload);
+  if (VERBOSITY > 0)
+    dr_printf("==MSANDR== Starting!\n");
+}
diff --git a/lib/profile/GCDAProfiling.c b/lib/profile/GCDAProfiling.c
index 7c52a17..c7f74fd 100644
--- a/lib/profile/GCDAProfiling.c
+++ b/lib/profile/GCDAProfiling.c
@@ -145,7 +145,7 @@
  * profiling enabled will emit to a different file. Only one file may be
  * started at a time.
  */
-void llvm_gcda_start_file(const char *orig_filename) {
+void llvm_gcda_start_file(const char *orig_filename, const char version[4]) {
   char *filename = mangle_filename(orig_filename);
 
   /* Try just opening the file. */
@@ -167,13 +167,10 @@
     }
   }
 
-  /* gcda file, version 404*, stamp LLVM. */
-#ifdef __APPLE__
-  fwrite("adcg*204MVLL", 12, 1, output_file);
-#else
-  fwrite("adcg*404MVLL", 12, 1, output_file);
-#endif
-
+  /* gcda file, version, stamp LLVM. */
+  fwrite("adcg", 4, 1, output_file);
+  fwrite(version, 4, 1, output_file);
+  fwrite("MVLL", 4, 1, output_file);
   free(filename);
 
 #ifdef DEBUG_GCDAPROFILING
@@ -206,19 +203,28 @@
 #endif
 }
 
-void llvm_gcda_emit_function(uint32_t ident, const char *function_name) {
+void llvm_gcda_emit_function(uint32_t ident, const char *function_name,
+                             uint8_t use_extra_checksum) {
+  uint32_t len = 2;
+  if (use_extra_checksum)
+    len++;
 #ifdef DEBUG_GCDAPROFILING
-  fprintf(stderr, "llvmgcda: function id=0x%08x\n", ident);
+  fprintf(stderr, "llvmgcda: function id=0x%08x name=%s\n", ident,
+          function_name ? function_name : "NULL");
 #endif
   if (!output_file) return;
 
-  /* function tag */  
+  /* function tag */
   fwrite("\0\0\0\1", 4, 1, output_file);
-  write_int32(3 + 1 + length_of_string(function_name));
+  if (function_name)
+    len += 1 + length_of_string(function_name);
+  write_int32(len);
   write_int32(ident);
   write_int32(0);
-  write_int32(0);
-  write_string(function_name);
+  if (use_extra_checksum)
+    write_int32(0);
+  if (function_name)
+    write_string(function_name);
 }
 
 void llvm_gcda_emit_arcs(uint32_t num_counters, uint64_t *counters) {
diff --git a/lib/sanitizer_common/CMakeLists.txt b/lib/sanitizer_common/CMakeLists.txt
index f346163..b762855 100644
--- a/lib/sanitizer_common/CMakeLists.txt
+++ b/lib/sanitizer_common/CMakeLists.txt
@@ -8,59 +8,72 @@
   sanitizer_libc.cc
   sanitizer_linux.cc
   sanitizer_mac.cc
+  sanitizer_platform_limits_posix.cc
   sanitizer_posix.cc
   sanitizer_printf.cc
   sanitizer_stackdepot.cc
   sanitizer_stacktrace.cc
   sanitizer_symbolizer.cc
+  sanitizer_symbolizer_itanium.cc
   sanitizer_symbolizer_linux.cc
   sanitizer_symbolizer_mac.cc
   sanitizer_symbolizer_win.cc
   sanitizer_win.cc
   )
 
+# Explicitly list all sanitizer_common headers. Not all of these are
+# included in sanitizer_common source files, but we need to depend on
+# headers when building our custom unit tests.
+set(SANITIZER_HEADERS
+  sanitizer_allocator.h
+  sanitizer_atomic_clang.h
+  sanitizer_atomic_msvc.h
+  sanitizer_atomic.h
+  sanitizer_common.h
+  sanitizer_common_interceptors.inc
+  sanitizer_common_interceptors_scanf.inc
+  sanitizer_flags.h
+  sanitizer_internal_defs.h
+  sanitizer_lfstack.h
+  sanitizer_libc.h
+  sanitizer_linux.h
+  sanitizer_list.h
+  sanitizer_mutex.h
+  sanitizer_placement_new.h
+  sanitizer_platform_interceptors.h
+  sanitizer_procmaps.h
+  sanitizer_quarantine.h
+  sanitizer_report_decorator.h
+  sanitizer_stackdepot.h
+  sanitizer_stacktrace.h
+  sanitizer_symbolizer.h
+  )
+
 set(SANITIZER_CFLAGS ${SANITIZER_COMMON_CFLAGS})
 
 set(SANITIZER_RUNTIME_LIBRARIES)
 if(APPLE)
   # Build universal binary on APPLE.
-  add_library(RTSanitizerCommon.osx OBJECT ${SANITIZER_SOURCES})
-  set_target_compile_flags(RTSanitizerCommon.osx ${SANITIZER_CFLAGS})
-  filter_available_targets(SANITIZER_TARGETS x86_64 i386)
-  set_target_properties(RTSanitizerCommon.osx PROPERTIES
-    OSX_ARCHITECTURES "${SANITIZER_TARGETS}")
+  add_compiler_rt_osx_object_library(RTSanitizerCommon
+    ARCH ${SANITIZER_COMMON_SUPPORTED_ARCH}
+    SOURCES ${SANITIZER_SOURCES}
+    CFLAGS ${SANITIZER_CFLAGS})
   list(APPEND SANITIZER_RUNTIME_LIBRARIES RTSanitizerCommon.osx)
+elseif(ANDROID)
+  add_library(RTSanitizerCommon.arm.android OBJECT ${SANITIZER_SOURCES})
+  set_target_compile_flags(RTSanitizerCommon.arm.android
+    ${SANITIZER_CFLAGS})
+  list(APPEND SANITIZER_RUNTIME_LIBRARIES RTSanitizerCommon.arm.android)
 else()
   # Otherwise, build separate libraries for each target.
-  if(CAN_TARGET_X86_64)
-    add_library(RTSanitizerCommon.x86_64 OBJECT ${SANITIZER_SOURCES})
-    set_target_compile_flags(RTSanitizerCommon.x86_64
-      ${SANITIZER_CFLAGS} ${TARGET_X86_64_CFLAGS})
-    list(APPEND SANITIZER_RUNTIME_LIBRARIES RTSanitizerCommon.x86_64)
-  endif()
-  if(CAN_TARGET_I386)
-    add_library(RTSanitizerCommon.i386 OBJECT ${SANITIZER_SOURCES})
-    set_target_compile_flags(RTSanitizerCommon.i386
-      ${SANITIZER_CFLAGS} ${TARGET_I386_CFLAGS})
-    list(APPEND SANITIZER_RUNTIME_LIBRARIES RTSanitizerCommon.i386)
-  endif()
-  if(ANDROID)
-    add_library(RTSanitizerCommon.arm.android OBJECT ${SANITIZER_SOURCES})
-    set_target_compile_flags(RTSanitizerCommon.arm.android
-      ${SANITIZER_CFLAGS})
-    list(APPEND SANITIZER_RUNTIME_LIBRARIES RTSanitizerCommon.arm.android)
-  endif()
+  foreach(arch ${SANITIZER_COMMON_SUPPORTED_ARCH})
+    add_compiler_rt_object_library(RTSanitizerCommon ${arch}
+      SOURCES ${SANITIZER_SOURCES} CFLAGS ${SANITIZER_CFLAGS})
+    list(APPEND SANITIZER_RUNTIME_LIBRARIES RTSanitizerCommon.${arch})
+  endforeach()
 endif()
 
 # Unit tests for common sanitizer runtime.
 if(LLVM_INCLUDE_TESTS)
-  # Build stand-alone static sanitizer runtime for use in unit tests.
-  add_library(RTSanitizerCommon.test STATIC ${SANITIZER_SOURCES})
-  # Build test library with debug info.
-  set_target_compile_flags(RTSanitizerCommon.test
-    ${SANITIZER_CFLAGS} -g)
-  set_target_properties(RTSanitizerCommon.test PROPERTIES
-    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-
   add_subdirectory(tests)
 endif()
diff --git a/lib/sanitizer_common/sanitizer_allocator.cc b/lib/sanitizer_common/sanitizer_allocator.cc
index b13a7c6..88a3a1b 100644
--- a/lib/sanitizer_common/sanitizer_allocator.cc
+++ b/lib/sanitizer_common/sanitizer_allocator.cc
@@ -75,4 +75,10 @@
   low_level_alloc_callback = callback;
 }
 
+bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n) {
+  if (!size) return false;
+  uptr max = (uptr)-1L;
+  return (max / size) < n;
+}
+
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_allocator.h b/lib/sanitizer_common/sanitizer_allocator.h
index 6325088..6321d31 100644
--- a/lib/sanitizer_common/sanitizer_allocator.h
+++ b/lib/sanitizer_common/sanitizer_allocator.h
@@ -19,113 +19,273 @@
 #include "sanitizer_libc.h"
 #include "sanitizer_list.h"
 #include "sanitizer_mutex.h"
+#include "sanitizer_lfstack.h"
 
 namespace __sanitizer {
 
-// Maps size class id to size and back.
-template <uptr l0, uptr l1, uptr l2, uptr l3, uptr l4, uptr l5,
-          uptr s0, uptr s1, uptr s2, uptr s3, uptr s4,
-          uptr c0, uptr c1, uptr c2, uptr c3, uptr c4>
-class SplineSizeClassMap {
- private:
-  // Here we use a spline composed of 5 polynomials of oder 1.
-  // The first size class is l0, then the classes go with step s0
-  // untill they reach l1, after which they go with step s1 and so on.
-  // Steps should be powers of two for cheap division.
-  // The size of the last size class should be a power of two.
-  // There should be at most 256 size classes.
-  static const uptr u0 = 0  + (l1 - l0) / s0;
-  static const uptr u1 = u0 + (l2 - l1) / s1;
-  static const uptr u2 = u1 + (l3 - l2) / s2;
-  static const uptr u3 = u2 + (l4 - l3) / s3;
-  static const uptr u4 = u3 + (l5 - l4) / s4;
+// SizeClassMap maps allocation sizes into size classes and back.
+// Class 0 corresponds to size 0.
+// Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
+// Next 4 classes: 256 + i * 64  (i = 1 to 4).
+// Next 4 classes: 512 + i * 128 (i = 1 to 4).
+// ...
+// Next 4 classes: 2^k + i * 2^(k-2) (i = 1 to 4).
+// Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
+//
+// This structure of the size class map gives us:
+//   - Efficient table-free class-to-size and size-to-class functions.
+//   - Difference between two consequent size classes is betweed 14% and 25%
+//
+// This class also gives a hint to a thread-caching allocator about the amount
+// of chunks that need to be cached per-thread:
+//  - kMaxNumCached is the maximal number of chunks per size class.
+//  - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
+//
+// Part of output of SizeClassMap::Print():
+// c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
+// c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
+// c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2
+// c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3
+// c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4
+// c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5
+// c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6
+// c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7
+//
+// c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8
+// c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9
+// c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10
+// c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11
+// c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12
+// c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13
+// c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14
+// c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15
+//
+// c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16
+// c17 => s: 320 diff: +64 25% l 8 cached: 204 65280; id 17
+// c18 => s: 384 diff: +64 20% l 8 cached: 170 65280; id 18
+// c19 => s: 448 diff: +64 16% l 8 cached: 146 65408; id 19
+//
+// c20 => s: 512 diff: +64 14% l 9 cached: 128 65536; id 20
+// c21 => s: 640 diff: +128 25% l 9 cached: 102 65280; id 21
+// c22 => s: 768 diff: +128 20% l 9 cached: 85 65280; id 22
+// c23 => s: 896 diff: +128 16% l 9 cached: 73 65408; id 23
+//
+// c24 => s: 1024 diff: +128 14% l 10 cached: 64 65536; id 24
+// c25 => s: 1280 diff: +256 25% l 10 cached: 51 65280; id 25
+// c26 => s: 1536 diff: +256 20% l 10 cached: 42 64512; id 26
+// c27 => s: 1792 diff: +256 16% l 10 cached: 36 64512; id 27
+//
+// ...
+//
+// c48 => s: 65536 diff: +8192 14% l 16 cached: 1 65536; id 48
+// c49 => s: 81920 diff: +16384 25% l 16 cached: 1 81920; id 49
+// c50 => s: 98304 diff: +16384 20% l 16 cached: 1 98304; id 50
+// c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51
+//
+// c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52
+
+template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog>
+class SizeClassMap {
+  static const uptr kMinSizeLog = 4;
+  static const uptr kMidSizeLog = kMinSizeLog + 4;
+  static const uptr kMinSize = 1 << kMinSizeLog;
+  static const uptr kMidSize = 1 << kMidSizeLog;
+  static const uptr kMidClass = kMidSize / kMinSize;
+  static const uptr S = 2;
+  static const uptr M = (1 << S) - 1;
 
  public:
-  // The number of size classes should be a power of two for fast division.
-  static const uptr kNumClasses = u4 + 1;
-  static const uptr kMaxSize = l5;
-  static const uptr kMinSize = l0;
+  static const uptr kMaxNumCached = kMaxNumCachedT;
+  // We transfer chunks between central and thread-local free lists in batches.
+  // For small size classes we allocate batches separately.
+  // For large size classes we use one of the chunks to store the batch.
+  struct TransferBatch {
+    TransferBatch *next;
+    uptr count;
+    void *batch[kMaxNumCached];
+  };
 
-  COMPILER_CHECK(kNumClasses <= 256);
-  COMPILER_CHECK((kNumClasses & (kNumClasses - 1)) == 0);
-  COMPILER_CHECK((kMaxSize & (kMaxSize - 1)) == 0);
+  static const uptr kMaxSize = 1 << kMaxSizeLog;
+  static const uptr kNumClasses =
+      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
+  COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
+  static const uptr kNumClassesRounded =
+      kNumClasses == 32  ? 32 :
+      kNumClasses <= 64  ? 64 :
+      kNumClasses <= 128 ? 128 : 256;
 
   static uptr Size(uptr class_id) {
-    if (class_id <= u0) return l0 + s0 * (class_id - 0);
-    if (class_id <= u1) return l1 + s1 * (class_id - u0);
-    if (class_id <= u2) return l2 + s2 * (class_id - u1);
-    if (class_id <= u3) return l3 + s3 * (class_id - u2);
-    if (class_id <= u4) return l4 + s4 * (class_id - u3);
-    return 0;
+    if (class_id <= kMidClass)
+      return kMinSize * class_id;
+    class_id -= kMidClass;
+    uptr t = kMidSize << (class_id >> S);
+    return t + (t >> S) * (class_id & M);
   }
+
   static uptr ClassID(uptr size) {
-    if (size <= l1) return 0  + (size - l0 + s0 - 1) / s0;
-    if (size <= l2) return u0 + (size - l1 + s1 - 1) / s1;
-    if (size <= l3) return u1 + (size - l2 + s2 - 1) / s2;
-    if (size <= l4) return u2 + (size - l3 + s3 - 1) / s3;
-    if (size <= l5) return u3 + (size - l4 + s4 - 1) / s4;
-    return 0;
+    if (size <= kMidSize)
+      return (size + kMinSize - 1) >> kMinSizeLog;
+    if (size > kMaxSize) return 0;
+    uptr l = MostSignificantSetBitIndex(size);
+    uptr hbits = (size >> (l - S)) & M;
+    uptr lbits = size & ((1 << (l - S)) - 1);
+    uptr l1 = l - kMidSizeLog;
+    return kMidClass + (l1 << S) + hbits + (lbits > 0);
   }
 
   static uptr MaxCached(uptr class_id) {
-    if (class_id <= u0) return c0;
-    if (class_id <= u1) return c1;
-    if (class_id <= u2) return c2;
-    if (class_id <= u3) return c3;
-    if (class_id <= u4) return c4;
-    return 0;
+    if (class_id == 0) return 0;
+    uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
+    return Max<uptr>(1, Min(kMaxNumCached, n));
   }
-};
 
-class DefaultSizeClassMap: public SplineSizeClassMap<
-  /* l: */1 << 4, 1 << 9,  1 << 12, 1 << 15, 1 << 18, 1 << 21,
-  /* s: */1 << 4, 1 << 6,  1 << 9,  1 << 12, 1 << 15,
-  /* c: */256,    64,      16,      4,       1> {
- private:
-  COMPILER_CHECK(kNumClasses == 256);
-};
-
-class CompactSizeClassMap: public SplineSizeClassMap<
-  /* l: */1 << 3, 1 << 4,  1 << 7, 1 << 8, 1 << 12, 1 << 15,
-  /* s: */1 << 3, 1 << 4,  1 << 7, 1 << 8, 1 << 12,
-  /* c: */256,    64,      16,      4,       1> {
- private:
-  COMPILER_CHECK(kNumClasses <= 32);
-};
-
-struct AllocatorListNode {
-  AllocatorListNode *next;
-};
-
-typedef IntrusiveList<AllocatorListNode> AllocatorFreeList;
-
-// Move at most max_count chunks from allocate_from to allocate_to.
-// This function is better be a method of AllocatorFreeList, but we can't
-// inherit it from IntrusiveList as the ancient gcc complains about non-PODness.
-static inline void BulkMove(uptr max_count,
-                            AllocatorFreeList *allocate_from,
-                            AllocatorFreeList *allocate_to) {
-  CHECK(!allocate_from->empty());
-  CHECK(allocate_to->empty());
-  if (allocate_from->size() <= max_count) {
-    allocate_to->append_front(allocate_from);
-    CHECK(allocate_from->empty());
-  } else {
-    for (uptr i = 0; i < max_count; i++) {
-      AllocatorListNode *node = allocate_from->front();
-      allocate_from->pop_front();
-      allocate_to->push_front(node);
+  static void Print() {
+    uptr prev_s = 0;
+    uptr total_cached = 0;
+    for (uptr i = 0; i < kNumClasses; i++) {
+      uptr s = Size(i);
+      if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
+        Printf("\n");
+      uptr d = s - prev_s;
+      uptr p = prev_s ? (d * 100 / prev_s) : 0;
+      uptr l = s ? MostSignificantSetBitIndex(s) : 0;
+      uptr cached = MaxCached(i) * s;
+      Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
+             "cached: %zd %zd; id %zd\n",
+             i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s));
+      total_cached += cached;
+      prev_s = s;
     }
-    CHECK(!allocate_from->empty());
+    Printf("Total cached: %zd\n", total_cached);
   }
-  CHECK(!allocate_to->empty());
-}
+
+  static bool SizeClassRequiresSeparateTransferBatch(uptr class_id) {
+    return Size(class_id) < sizeof(TransferBatch) -
+        sizeof(uptr) * (kMaxNumCached - MaxCached(class_id));
+  }
+
+  static void Validate() {
+    for (uptr c = 1; c < kNumClasses; c++) {
+      // Printf("Validate: c%zd\n", c);
+      uptr s = Size(c);
+      CHECK_NE(s, 0U);
+      CHECK_EQ(ClassID(s), c);
+      if (c != kNumClasses - 1)
+        CHECK_EQ(ClassID(s + 1), c + 1);
+      CHECK_EQ(ClassID(s - 1), c);
+      if (c)
+        CHECK_GT(Size(c), Size(c-1));
+    }
+    CHECK_EQ(ClassID(kMaxSize + 1), 0);
+
+    for (uptr s = 1; s <= kMaxSize; s++) {
+      uptr c = ClassID(s);
+      // Printf("s%zd => c%zd\n", s, c);
+      CHECK_LT(c, kNumClasses);
+      CHECK_GE(Size(c), s);
+      if (c > 0)
+        CHECK_LT(Size(c-1), s);
+    }
+  }
+};
+
+typedef SizeClassMap<17, 256, 16> DefaultSizeClassMap;
+typedef SizeClassMap<17, 64,  14> CompactSizeClassMap;
+template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
+
+// Memory allocator statistics
+enum AllocatorStat {
+  AllocatorStatMalloced,
+  AllocatorStatFreed,
+  AllocatorStatMmapped,
+  AllocatorStatUnmapped,
+  AllocatorStatCount
+};
+
+typedef u64 AllocatorStatCounters[AllocatorStatCount];
+
+// Per-thread stats, live in per-thread cache.
+class AllocatorStats {
+ public:
+  void Init() {
+    internal_memset(this, 0, sizeof(*this));
+  }
+
+  void Add(AllocatorStat i, u64 v) {
+    v += atomic_load(&stats_[i], memory_order_relaxed);
+    atomic_store(&stats_[i], v, memory_order_relaxed);
+  }
+
+  void Set(AllocatorStat i, u64 v) {
+    atomic_store(&stats_[i], v, memory_order_relaxed);
+  }
+
+  u64 Get(AllocatorStat i) const {
+    return atomic_load(&stats_[i], memory_order_relaxed);
+  }
+
+ private:
+  friend class AllocatorGlobalStats;
+  AllocatorStats *next_;
+  AllocatorStats *prev_;
+  atomic_uint64_t stats_[AllocatorStatCount];
+};
+
+// Global stats, used for aggregation and querying.
+class AllocatorGlobalStats : public AllocatorStats {
+ public:
+  void Init() {
+    internal_memset(this, 0, sizeof(*this));
+    next_ = this;
+    prev_ = this;
+  }
+
+  void Register(AllocatorStats *s) {
+    SpinMutexLock l(&mu_);
+    s->next_ = next_;
+    s->prev_ = this;
+    next_->prev_ = s;
+    next_ = s;
+  }
+
+  void Unregister(AllocatorStats *s) {
+    SpinMutexLock l(&mu_);
+    s->prev_->next_ = s->next_;
+    s->next_->prev_ = s->prev_;
+    for (int i = 0; i < AllocatorStatCount; i++)
+      Add(AllocatorStat(i), s->Get(AllocatorStat(i)));
+  }
+
+  void Get(AllocatorStatCounters s) const {
+    internal_memset(s, 0, AllocatorStatCount * sizeof(u64));
+    SpinMutexLock l(&mu_);
+    const AllocatorStats *stats = this;
+    for (;;) {
+      for (int i = 0; i < AllocatorStatCount; i++)
+        s[i] += stats->Get(AllocatorStat(i));
+      stats = stats->next_;
+      if (stats == this)
+        break;
+    }
+  }
+
+ private:
+  mutable SpinMutex mu_;
+};
+
+// Allocators call these callbacks on mmap/munmap.
+struct NoOpMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const { }
+  void OnUnmap(uptr p, uptr size) const { }
+};
 
 // SizeClassAllocator64 -- allocator for 64-bit address space.
 //
 // Space: a portion of address space of kSpaceSize bytes starting at
 // a fixed address (kSpaceBeg). Both constants are powers of two and
 // kSpaceBeg is kSpaceSize-aligned.
+// At the beginning the entire space is mprotect-ed, then small parts of it
+// are mapped on demand.
 //
 // Region: a part of Space dedicated to a single size class.
 // There are kNumClasses Regions of equal size.
@@ -136,47 +296,52 @@
 // A Region looks like this:
 // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
 template <const uptr kSpaceBeg, const uptr kSpaceSize,
-          const uptr kMetadataSize, class SizeClassMap>
+          const uptr kMetadataSize, class SizeClassMap,
+          class MapUnmapCallback = NoOpMapUnmapCallback>
 class SizeClassAllocator64 {
  public:
+  typedef typename SizeClassMap::TransferBatch Batch;
+  typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
+      SizeClassMap, MapUnmapCallback> ThisT;
+  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
+
   void Init() {
-    CHECK_EQ(AllocBeg(), reinterpret_cast<uptr>(MmapFixedNoReserve(
-             AllocBeg(), AllocSize())));
+    CHECK_EQ(kSpaceBeg,
+             reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize)));
+    MapWithCallback(kSpaceEnd, AdditionalSize());
   }
 
-  bool CanAllocate(uptr size, uptr alignment) {
+  void MapWithCallback(uptr beg, uptr size) {
+    CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
+    MapUnmapCallback().OnMap(beg, size);
+  }
+
+  void UnmapWithCallback(uptr beg, uptr size) {
+    MapUnmapCallback().OnUnmap(beg, size);
+    UnmapOrDie(reinterpret_cast<void *>(beg), size);
+  }
+
+  static bool CanAllocate(uptr size, uptr alignment) {
     return size <= SizeClassMap::kMaxSize &&
       alignment <= SizeClassMap::kMaxSize;
   }
 
-  void *Allocate(uptr size, uptr alignment) {
-    if (size < alignment) size = alignment;
-    CHECK(CanAllocate(size, alignment));
-    return AllocateBySizeClass(ClassID(size));
-  }
-
-  void Deallocate(void *p) {
-    CHECK(PointerIsMine(p));
-    DeallocateBySizeClass(p, GetSizeClass(p));
-  }
-
-  // Allocate several chunks of the given class_id.
-  void BulkAllocate(uptr class_id, AllocatorFreeList *free_list) {
+  NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
+                                uptr class_id) {
     CHECK_LT(class_id, kNumClasses);
     RegionInfo *region = GetRegionInfo(class_id);
-    SpinMutexLock l(&region->mutex);
-    if (region->free_list.empty()) {
-      PopulateFreeList(class_id, region);
-    }
-    BulkMove(SizeClassMap::MaxCached(class_id), &region->free_list, free_list);
+    Batch *b = region->free_list.Pop();
+    if (b == 0)
+      b = PopulateFreeList(stat, c, class_id, region);
+    region->n_allocated += b->count;
+    return b;
   }
 
-  // Swallow the entire free_list for the given class_id.
-  void BulkDeallocate(uptr class_id, AllocatorFreeList *free_list) {
-    CHECK_LT(class_id, kNumClasses);
+  NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
     RegionInfo *region = GetRegionInfo(class_id);
-    SpinMutexLock l(&region->mutex);
-    region->free_list.append_front(free_list);
+    CHECK_GT(b->count, 0);
+    region->free_list.Push(b);
+    region->n_freed += b->count;
   }
 
   static bool PointerIsMine(void *p) {
@@ -184,16 +349,22 @@
   }
 
   static uptr GetSizeClass(void *p) {
-    return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClasses;
+    return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded;
   }
 
-  static void *GetBlockBegin(void *p) {
+  void *GetBlockBegin(void *p) {
     uptr class_id = GetSizeClass(p);
     uptr size = SizeClassMap::Size(class_id);
+    if (!size) return 0;
     uptr chunk_idx = GetChunkIdx((uptr)p, size);
     uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
-    uptr begin = reg_beg + chunk_idx * size;
-    return reinterpret_cast<void*>(begin);
+    uptr beg = chunk_idx * size;
+    uptr next_beg = beg + size;
+    if (class_id >= kNumClasses) return 0;
+    RegionInfo *region = GetRegionInfo(class_id);
+    if (region->mapped_user >= next_beg)
+      return reinterpret_cast<void*>(reg_beg + beg);
+    return 0;
   }
 
   static uptr GetActuallyAllocatedSize(void *p) {
@@ -220,38 +391,80 @@
 
   // Test-only.
   void TestOnlyUnmap() {
-    UnmapOrDie(reinterpret_cast<void*>(AllocBeg()), AllocSize());
+    UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize());
   }
 
-  static uptr AllocBeg()  { return kSpaceBeg; }
-  static uptr AllocSize() { return kSpaceSize + AdditionalSize(); }
+  void PrintStats() {
+    uptr total_mapped = 0;
+    uptr n_allocated = 0;
+    uptr n_freed = 0;
+    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
+      RegionInfo *region = GetRegionInfo(class_id);
+      total_mapped += region->mapped_user;
+      n_allocated += region->n_allocated;
+      n_freed += region->n_freed;
+    }
+    Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
+           "remains %zd\n",
+           total_mapped >> 20, n_allocated, n_allocated - n_freed);
+    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
+      RegionInfo *region = GetRegionInfo(class_id);
+      if (region->mapped_user == 0) continue;
+      Printf("  %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
+             class_id,
+             SizeClassMap::Size(class_id),
+             region->mapped_user >> 10,
+             region->n_allocated,
+             region->n_allocated - region->n_freed);
+    }
+  }
+
+  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
+  // introspection API.
+  void ForceLock() {
+    for (uptr i = 0; i < kNumClasses; i++) {
+      GetRegionInfo(i)->mutex.Lock();
+    }
+  }
+
+  void ForceUnlock() {
+    for (int i = (int)kNumClasses - 1; i >= 0; i--) {
+      GetRegionInfo(i)->mutex.Unlock();
+    }
+  }
 
   typedef SizeClassMap SizeClassMapT;
-  static const uptr kNumClasses = SizeClassMap::kNumClasses;  // 2^k <= 256
+  static const uptr kNumClasses = SizeClassMap::kNumClasses;
+  static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
 
  private:
-  static const uptr kRegionSize = kSpaceSize / kNumClasses;
+  static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
+  static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize;
   COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
   // kRegionSize must be >= 2^32.
   COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
   // Populate the free list with at most this number of bytes at once
   // or with one element if its size is greater.
-  static const uptr kPopulateSize = 1 << 18;
+  static const uptr kPopulateSize = 1 << 14;
+  // Call mmap for user memory with at least this size.
+  static const uptr kUserMapSize = 1 << 16;
+  // Call mmap for metadata memory with at least this size.
+  static const uptr kMetaMapSize = 1 << 16;
 
   struct RegionInfo {
-    SpinMutex mutex;
-    AllocatorFreeList free_list;
+    BlockingMutex mutex;
+    LFStack<Batch> free_list;
     uptr allocated_user;  // Bytes allocated for user memory.
     uptr allocated_meta;  // Bytes allocated for metadata.
-    char padding[kCacheLineSize - 3 * sizeof(uptr) - sizeof(AllocatorFreeList)];
+    uptr mapped_user;  // Bytes mapped for user memory.
+    uptr mapped_meta;  // Bytes mapped for metadata.
+    uptr n_allocated, n_freed;  // Just stats.
   };
-  COMPILER_CHECK(sizeof(RegionInfo) == kCacheLineSize);
+  COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
 
   static uptr AdditionalSize() {
-    uptr PageSize = GetPageSizeCached();
-    uptr res = Max(sizeof(RegionInfo) * kNumClasses, PageSize);
-    CHECK_EQ(res % PageSize, 0);
-    return res;
+    return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
+                     GetPageSizeCached());
   }
 
   RegionInfo *GetRegionInfo(uptr class_id) {
@@ -268,47 +481,64 @@
     return offset / (u32)size;
   }
 
-  void PopulateFreeList(uptr class_id, RegionInfo *region) {
-    CHECK(region->free_list.empty());
+  NOINLINE Batch* PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
+                                   uptr class_id, RegionInfo *region) {
+    BlockingMutexLock l(&region->mutex);
+    Batch *b = region->free_list.Pop();
+    if (b)
+      return b;
     uptr size = SizeClassMap::Size(class_id);
+    uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1;
     uptr beg_idx = region->allocated_user;
-    uptr end_idx = beg_idx + kPopulateSize;
+    uptr end_idx = beg_idx + count * size;
     uptr region_beg = kSpaceBeg + kRegionSize * class_id;
-    uptr idx = beg_idx;
-    uptr i = 0;
-    do {  // do-while loop because we need to put at least one item.
-      uptr p = region_beg + idx;
-      region->free_list.push_front(reinterpret_cast<AllocatorListNode*>(p));
-      idx += size;
-      i++;
-    } while (idx < end_idx);
-    region->allocated_user += idx - beg_idx;
-    region->allocated_meta += i * kMetadataSize;
+    if (end_idx + size > region->mapped_user) {
+      // Do the mmap for the user memory.
+      uptr map_size = kUserMapSize;
+      while (end_idx + size > region->mapped_user + map_size)
+        map_size += kUserMapSize;
+      CHECK_GE(region->mapped_user + map_size, end_idx);
+      MapWithCallback(region_beg + region->mapped_user, map_size);
+      stat->Add(AllocatorStatMmapped, map_size);
+      region->mapped_user += map_size;
+    }
+    uptr total_count = (region->mapped_user - beg_idx - size)
+        / size / count * count;
+    region->allocated_meta += total_count * kMetadataSize;
+    if (region->allocated_meta > region->mapped_meta) {
+      uptr map_size = kMetaMapSize;
+      while (region->allocated_meta > region->mapped_meta + map_size)
+        map_size += kMetaMapSize;
+      // Do the mmap for the metadata.
+      CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
+      MapWithCallback(region_beg + kRegionSize -
+                      region->mapped_meta - map_size, map_size);
+      region->mapped_meta += map_size;
+    }
+    CHECK_LE(region->allocated_meta, region->mapped_meta);
     if (region->allocated_user + region->allocated_meta > kRegionSize) {
       Printf("Out of memory. Dying.\n");
       Printf("The process has exhausted %zuMB for size class %zu.\n",
           kRegionSize / 1024 / 1024, size);
       Die();
     }
-  }
-
-  void *AllocateBySizeClass(uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
-    RegionInfo *region = GetRegionInfo(class_id);
-    SpinMutexLock l(&region->mutex);
-    if (region->free_list.empty()) {
-      PopulateFreeList(class_id, region);
+    for (;;) {
+      if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
+        b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
+      else
+        b = (Batch*)(region_beg + beg_idx);
+      b->count = count;
+      for (uptr i = 0; i < count; i++)
+        b->batch[i] = (void*)(region_beg + beg_idx + i * size);
+      region->allocated_user += count * size;
+      CHECK_LE(region->allocated_user, region->mapped_user);
+      beg_idx += count * size;
+      if (beg_idx + count * size + size > region->mapped_user)
+        break;
+      CHECK_GT(b->count, 0);
+      region->free_list.Push(b);
     }
-    CHECK(!region->free_list.empty());
-    AllocatorListNode *node = region->free_list.front();
-    region->free_list.pop_front();
-    return reinterpret_cast<void*>(node);
-  }
-
-  void DeallocateBySizeClass(void *p, uptr class_id) {
-    RegionInfo *region = GetRegionInfo(class_id);
-    SpinMutexLock l(&region->mutex);
-    region->free_list.push_front(reinterpret_cast<AllocatorListNode*>(p));
+    return b;
   }
 };
 
@@ -323,7 +553,7 @@
 //   a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
 // Since the regions are aligned by kRegionSize, there are exactly
 // kNumPossibleRegions possible regions in the address space and so we keep
-// an u8 array possible_regions_[kNumPossibleRegions] to store the size classes.
+// an u8 array possible_regions[kNumPossibleRegions] to store the size classes.
 // 0 size class means the region is not used by the allocator.
 //
 // One Region is used to allocate chunks of a single size class.
@@ -333,30 +563,36 @@
 // In order to avoid false sharing the objects of this class should be
 // chache-line aligned.
 template <const uptr kSpaceBeg, const u64 kSpaceSize,
-          const uptr kMetadataSize, class SizeClassMap>
+          const uptr kMetadataSize, class SizeClassMap,
+          class MapUnmapCallback = NoOpMapUnmapCallback>
 class SizeClassAllocator32 {
  public:
-  // Don't need to call Init if the object is a global (i.e. zero-initialized).
+  typedef typename SizeClassMap::TransferBatch Batch;
+  typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
+      SizeClassMap, MapUnmapCallback> ThisT;
+  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
+
   void Init() {
-    internal_memset(this, 0, sizeof(*this));
+    state_ = reinterpret_cast<State *>(MapWithCallback(sizeof(State)));
   }
 
-  bool CanAllocate(uptr size, uptr alignment) {
+  void *MapWithCallback(uptr size) {
+    size = RoundUpTo(size, GetPageSizeCached());
+    void *res = MmapOrDie(size, "SizeClassAllocator32");
+    MapUnmapCallback().OnMap((uptr)res, size);
+    return res;
+  }
+
+  void UnmapWithCallback(uptr beg, uptr size) {
+    MapUnmapCallback().OnUnmap(beg, size);
+    UnmapOrDie(reinterpret_cast<void *>(beg), size);
+  }
+
+  static bool CanAllocate(uptr size, uptr alignment) {
     return size <= SizeClassMap::kMaxSize &&
       alignment <= SizeClassMap::kMaxSize;
   }
 
-  void *Allocate(uptr size, uptr alignment) {
-    if (size < alignment) size = alignment;
-    CHECK(CanAllocate(size, alignment));
-    return AllocateBySizeClass(ClassID(size));
-  }
-
-  void Deallocate(void *p) {
-    CHECK(PointerIsMine(p));
-    DeallocateBySizeClass(p, GetSizeClass(p));
-  }
-
   void *GetMetaData(void *p) {
     CHECK(PointerIsMine(p));
     uptr mem = reinterpret_cast<uptr>(p);
@@ -368,28 +604,33 @@
     return reinterpret_cast<void*>(meta);
   }
 
-  // Allocate several chunks of the given class_id.
-  void BulkAllocate(uptr class_id, AllocatorFreeList *free_list) {
+  NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
+                                uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
     SizeClassInfo *sci = GetSizeClassInfo(class_id);
     SpinMutexLock l(&sci->mutex);
-    EnsureSizeClassHasAvailableChunks(sci, class_id);
+    if (sci->free_list.empty())
+      PopulateFreeList(stat, c, sci, class_id);
     CHECK(!sci->free_list.empty());
-    BulkMove(SizeClassMap::MaxCached(class_id), &sci->free_list, free_list);
+    Batch *b = sci->free_list.front();
+    sci->free_list.pop_front();
+    return b;
   }
 
-  // Swallow the entire free_list for the given class_id.
-  void BulkDeallocate(uptr class_id, AllocatorFreeList *free_list) {
+  NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
+    CHECK_LT(class_id, kNumClasses);
     SizeClassInfo *sci = GetSizeClassInfo(class_id);
     SpinMutexLock l(&sci->mutex);
-    sci->free_list.append_front(free_list);
+    CHECK_GT(b->count, 0);
+    sci->free_list.push_front(b);
   }
 
   bool PointerIsMine(void *p) {
-    return possible_regions_[ComputeRegionId(reinterpret_cast<uptr>(p))] != 0;
+    return GetSizeClass(p) != 0;
   }
 
   uptr GetSizeClass(void *p) {
-    return possible_regions_[ComputeRegionId(reinterpret_cast<uptr>(p))] - 1;
+    return state_->possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
   }
 
   void *GetBlockBegin(void *p) {
@@ -414,30 +655,47 @@
     // No need to lock here.
     uptr res = 0;
     for (uptr i = 0; i < kNumPossibleRegions; i++)
-      if (possible_regions_[i])
+      if (state_->possible_regions[i])
         res += kRegionSize;
     return res;
   }
 
   void TestOnlyUnmap() {
     for (uptr i = 0; i < kNumPossibleRegions; i++)
-      if (possible_regions_[i])
-        UnmapOrDie(reinterpret_cast<void*>(i * kRegionSize), kRegionSize);
+      if (state_->possible_regions[i])
+        UnmapWithCallback((i * kRegionSize), kRegionSize);
+    UnmapWithCallback(reinterpret_cast<uptr>(state_), sizeof(State));
+  }
+
+  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
+  // introspection API.
+  void ForceLock() {
+    for (uptr i = 0; i < kNumClasses; i++) {
+      GetSizeClassInfo(i)->mutex.Lock();
+    }
+  }
+
+  void ForceUnlock() {
+    for (int i = kNumClasses - 1; i >= 0; i--) {
+      GetSizeClassInfo(i)->mutex.Unlock();
+    }
+  }
+
+  void PrintStats() {
   }
 
   typedef SizeClassMap SizeClassMapT;
-  static const uptr kNumClasses = SizeClassMap::kNumClasses;  // 2^k <= 128
+  static const uptr kNumClasses = SizeClassMap::kNumClasses;
 
  private:
   static const uptr kRegionSizeLog = SANITIZER_WORDSIZE == 64 ? 24 : 20;
   static const uptr kRegionSize = 1 << kRegionSizeLog;
   static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
-  COMPILER_CHECK(kNumClasses <= 128);
 
   struct SizeClassInfo {
     SpinMutex mutex;
-    AllocatorFreeList free_list;
-    char padding[kCacheLineSize - sizeof(uptr) - sizeof(AllocatorFreeList)];
+    IntrusiveList<Batch> free_list;
+    char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)];
   };
   COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
 
@@ -451,118 +709,176 @@
     return mem & ~(kRegionSize - 1);
   }
 
-  uptr AllocateRegion(uptr class_id) {
+  uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
     CHECK_LT(class_id, kNumClasses);
     uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
                                       "SizeClassAllocator32"));
+    MapUnmapCallback().OnMap(res, kRegionSize);
+    stat->Add(AllocatorStatMmapped, kRegionSize);
     CHECK_EQ(0U, (res & (kRegionSize - 1)));
-    CHECK_EQ(0U, possible_regions_[ComputeRegionId(res)]);
-    possible_regions_[ComputeRegionId(res)] = class_id + 1;
+    CHECK_EQ(0U, state_->possible_regions[ComputeRegionId(res)]);
+    state_->possible_regions[ComputeRegionId(res)] = class_id;
     return res;
   }
 
   SizeClassInfo *GetSizeClassInfo(uptr class_id) {
     CHECK_LT(class_id, kNumClasses);
-    return &size_class_info_array_[class_id];
+    return &state_->size_class_info_array[class_id];
   }
 
-  void EnsureSizeClassHasAvailableChunks(SizeClassInfo *sci, uptr class_id) {
-    if (!sci->free_list.empty()) return;
+  void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
+                        SizeClassInfo *sci, uptr class_id) {
     uptr size = SizeClassMap::Size(class_id);
-    uptr reg = AllocateRegion(class_id);
+    uptr reg = AllocateRegion(stat, class_id);
     uptr n_chunks = kRegionSize / (size + kMetadataSize);
-    for (uptr i = reg; i < reg + n_chunks * size; i += size)
-      sci->free_list.push_back(reinterpret_cast<AllocatorListNode*>(i));
+    uptr max_count = SizeClassMap::MaxCached(class_id);
+    Batch *b = 0;
+    for (uptr i = reg; i < reg + n_chunks * size; i += size) {
+      if (b == 0) {
+        if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
+          b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
+        else
+          b = (Batch*)i;
+        b->count = 0;
+      }
+      b->batch[b->count++] = (void*)i;
+      if (b->count == max_count) {
+        CHECK_GT(b->count, 0);
+        sci->free_list.push_back(b);
+        b = 0;
+      }
+    }
+    if (b) {
+      CHECK_GT(b->count, 0);
+      sci->free_list.push_back(b);
+    }
   }
 
-  void *AllocateBySizeClass(uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
-    SizeClassInfo *sci = GetSizeClassInfo(class_id);
-    SpinMutexLock l(&sci->mutex);
-    EnsureSizeClassHasAvailableChunks(sci, class_id);
-    CHECK(!sci->free_list.empty());
-    AllocatorListNode *node = sci->free_list.front();
-    sci->free_list.pop_front();
-    return reinterpret_cast<void*>(node);
-  }
-
-  void DeallocateBySizeClass(void *p, uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
-    SizeClassInfo *sci = GetSizeClassInfo(class_id);
-    SpinMutexLock l(&sci->mutex);
-    sci->free_list.push_front(reinterpret_cast<AllocatorListNode*>(p));
-  }
-
-  u8 possible_regions_[kNumPossibleRegions];
-  SizeClassInfo size_class_info_array_[kNumClasses];
+  struct State {
+    u8 possible_regions[kNumPossibleRegions];
+    SizeClassInfo size_class_info_array[kNumClasses];
+  };
+  State *state_;
 };
 
-// Objects of this type should be used as local caches for SizeClassAllocator64.
-// Since the typical use of this class is to have one object per thread in TLS,
-// is has to be POD.
+// Objects of this type should be used as local caches for SizeClassAllocator64
+// or SizeClassAllocator32. Since the typical use of this class is to have one
+// object per thread in TLS, is has to be POD.
 template<class SizeClassAllocator>
 struct SizeClassAllocatorLocalCache {
   typedef SizeClassAllocator Allocator;
   static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
-  // Don't need to call Init if the object is a global (i.e. zero-initialized).
-  void Init() {
-    internal_memset(this, 0, sizeof(*this));
+
+  void Init(AllocatorGlobalStats *s) {
+    stats_.Init();
+    if (s)
+      s->Register(&stats_);
+  }
+
+  void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
+    Drain(allocator);
+    if (s)
+      s->Unregister(&stats_);
   }
 
   void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
+    CHECK_NE(class_id, 0UL);
     CHECK_LT(class_id, kNumClasses);
-    AllocatorFreeList *free_list = &free_lists_[class_id];
-    if (free_list->empty())
-      allocator->BulkAllocate(class_id, free_list);
-    CHECK(!free_list->empty());
-    void *res = free_list->front();
-    free_list->pop_front();
+    stats_.Add(AllocatorStatMalloced, SizeClassMap::Size(class_id));
+    PerClass *c = &per_class_[class_id];
+    if (UNLIKELY(c->count == 0))
+      Refill(allocator, class_id);
+    void *res = c->batch[--c->count];
+    PREFETCH(c->batch[c->count - 1]);
     return res;
   }
 
   void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
+    CHECK_NE(class_id, 0UL);
     CHECK_LT(class_id, kNumClasses);
-    AllocatorFreeList *free_list = &free_lists_[class_id];
-    free_list->push_front(reinterpret_cast<AllocatorListNode*>(p));
-    if (free_list->size() >= 2 * SizeClassMap::MaxCached(class_id))
-      DrainHalf(allocator, class_id);
+    // If the first allocator call on a new thread is a deallocation, then
+    // max_count will be zero, leading to check failure.
+    InitCache();
+    stats_.Add(AllocatorStatFreed, SizeClassMap::Size(class_id));
+    PerClass *c = &per_class_[class_id];
+    CHECK_NE(c->max_count, 0UL);
+    if (UNLIKELY(c->count == c->max_count))
+      Drain(allocator, class_id);
+    c->batch[c->count++] = p;
   }
 
   void Drain(SizeClassAllocator *allocator) {
-    for (uptr i = 0; i < kNumClasses; i++) {
-      allocator->BulkDeallocate(i, &free_lists_[i]);
-      CHECK(free_lists_[i].empty());
+    for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
+      PerClass *c = &per_class_[class_id];
+      while (c->count > 0)
+        Drain(allocator, class_id);
     }
   }
 
   // private:
   typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
-  AllocatorFreeList free_lists_[kNumClasses];
+  typedef typename SizeClassMap::TransferBatch Batch;
+  struct PerClass {
+    uptr count;
+    uptr max_count;
+    void *batch[2 * SizeClassMap::kMaxNumCached];
+  };
+  PerClass per_class_[kNumClasses];
+  AllocatorStats stats_;
 
-  void DrainHalf(SizeClassAllocator *allocator, uptr class_id) {
-    AllocatorFreeList *free_list = &free_lists_[class_id];
-    AllocatorFreeList half;
-    half.clear();
-    const uptr count = free_list->size() / 2;
-    for (uptr i = 0; i < count; i++) {
-      AllocatorListNode *node = free_list->front();
-      free_list->pop_front();
-      half.push_front(node);
+  void InitCache() {
+    if (per_class_[1].max_count)
+      return;
+    for (uptr i = 0; i < kNumClasses; i++) {
+      PerClass *c = &per_class_[i];
+      c->max_count = 2 * SizeClassMap::MaxCached(i);
     }
-    allocator->BulkDeallocate(class_id, &half);
+  }
+
+  NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) {
+    InitCache();
+    PerClass *c = &per_class_[class_id];
+    Batch *b = allocator->AllocateBatch(&stats_, this, class_id);
+    CHECK_GT(b->count, 0);
+    for (uptr i = 0; i < b->count; i++)
+      c->batch[i] = b->batch[i];
+    c->count = b->count;
+    if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
+      Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b);
+  }
+
+  NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
+    InitCache();
+    PerClass *c = &per_class_[class_id];
+    Batch *b;
+    if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
+      b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch)));
+    else
+      b = (Batch*)c->batch[0];
+    uptr cnt = Min(c->max_count / 2, c->count);
+    for (uptr i = 0; i < cnt; i++) {
+      b->batch[i] = c->batch[i];
+      c->batch[i] = c->batch[i + c->max_count / 2];
+    }
+    b->count = cnt;
+    c->count -= cnt;
+    CHECK_GT(b->count, 0);
+    allocator->DeallocateBatch(&stats_, class_id, b);
   }
 };
 
 // This class can (de)allocate only large chunks of memory using mmap/unmap.
 // The main purpose of this allocator is to cover large and rare allocation
 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
+template <class MapUnmapCallback = NoOpMapUnmapCallback>
 class LargeMmapAllocator {
  public:
   void Init() {
     internal_memset(this, 0, sizeof(*this));
     page_size_ = GetPageSizeCached();
   }
-  void *Allocate(uptr size, uptr alignment) {
+
+  void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) {
     CHECK(IsPowerOfTwo(alignment));
     uptr map_size = RoundUpMapSize(size);
     if (alignment > page_size_)
@@ -570,6 +886,7 @@
     if (map_size < size) return 0;  // Overflow.
     uptr map_beg = reinterpret_cast<uptr>(
         MmapOrDie(map_size, "LargeMmapAllocator"));
+    MapUnmapCallback().OnMap(map_beg, map_size);
     uptr map_end = map_beg + map_size;
     uptr res = map_beg + page_size_;
     if (res & (alignment - 1))  // Align.
@@ -580,78 +897,121 @@
     h->size = size;
     h->map_beg = map_beg;
     h->map_size = map_size;
+    uptr size_log = MostSignificantSetBitIndex(map_size);
+    CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
     {
       SpinMutexLock l(&mutex_);
-      h->next = list_;
-      h->prev = 0;
-      if (list_)
-        list_->prev = h;
-      list_ = h;
+      uptr idx = n_chunks_++;
+      CHECK_LT(idx, kMaxNumChunks);
+      h->chunk_idx = idx;
+      chunks_[idx] = h;
+      stats.n_allocs++;
+      stats.currently_allocated += map_size;
+      stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
+      stats.by_size_log[size_log]++;
+      stat->Add(AllocatorStatMalloced, map_size);
+      stat->Add(AllocatorStatMmapped, map_size);
     }
     return reinterpret_cast<void*>(res);
   }
 
-  void Deallocate(void *p) {
+  void Deallocate(AllocatorStats *stat, void *p) {
     Header *h = GetHeader(p);
     {
       SpinMutexLock l(&mutex_);
-      Header *prev = h->prev;
-      Header *next = h->next;
-      if (prev)
-        prev->next = next;
-      if (next)
-        next->prev = prev;
-      if (h == list_)
-        list_ = next;
+      uptr idx = h->chunk_idx;
+      CHECK_EQ(chunks_[idx], h);
+      CHECK_LT(idx, n_chunks_);
+      chunks_[idx] = chunks_[n_chunks_ - 1];
+      chunks_[idx]->chunk_idx = idx;
+      n_chunks_--;
+      stats.n_frees++;
+      stats.currently_allocated -= h->map_size;
+      stat->Add(AllocatorStatFreed, h->map_size);
+      stat->Add(AllocatorStatUnmapped, h->map_size);
     }
+    MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
     UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
   }
 
   uptr TotalMemoryUsed() {
     SpinMutexLock l(&mutex_);
     uptr res = 0;
-    for (Header *l = list_; l; l = l->next) {
-      res += RoundUpMapSize(l->size);
+    for (uptr i = 0; i < n_chunks_; i++) {
+      Header *h = chunks_[i];
+      CHECK_EQ(h->chunk_idx, i);
+      res += RoundUpMapSize(h->size);
     }
     return res;
   }
 
   bool PointerIsMine(void *p) {
-    // Fast check.
-    if ((reinterpret_cast<uptr>(p) & (page_size_ - 1))) return false;
-    SpinMutexLock l(&mutex_);
-    for (Header *l = list_; l; l = l->next) {
-      if (GetUser(l) == p) return true;
-    }
-    return false;
+    return GetBlockBegin(p) != 0;
   }
 
   uptr GetActuallyAllocatedSize(void *p) {
-    return RoundUpMapSize(GetHeader(p)->size) - page_size_;
+    return RoundUpTo(GetHeader(p)->size, page_size_);
   }
 
   // At least page_size_/2 metadata bytes is available.
   void *GetMetaData(void *p) {
+    // Too slow: CHECK_EQ(p, GetBlockBegin(p));
+    CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
     return GetHeader(p) + 1;
   }
 
-  void *GetBlockBegin(void *p) {
+  void *GetBlockBegin(void *ptr) {
+    uptr p = reinterpret_cast<uptr>(ptr);
     SpinMutexLock l(&mutex_);
-    for (Header *l = list_; l; l = l->next) {
-      void *b = GetUser(l);
-      if (p >= b && p < (u8*)b + l->size)
-        return b;
+    uptr nearest_chunk = 0;
+    // Cache-friendly linear search.
+    for (uptr i = 0; i < n_chunks_; i++) {
+      uptr ch = reinterpret_cast<uptr>(chunks_[i]);
+      if (p < ch) continue;  // p is at left to this chunk, skip it.
+      if (p - ch < p - nearest_chunk)
+        nearest_chunk = ch;
     }
-    return 0;
+    if (!nearest_chunk)
+      return 0;
+    Header *h = reinterpret_cast<Header *>(nearest_chunk);
+    CHECK_GE(nearest_chunk, h->map_beg);
+    CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
+    CHECK_LE(nearest_chunk, p);
+    if (h->map_beg + h->map_size < p)
+      return 0;
+    return GetUser(h);
+  }
+
+  void PrintStats() {
+    Printf("Stats: LargeMmapAllocator: allocated %zd times, "
+           "remains %zd (%zd K) max %zd M; by size logs: ",
+           stats.n_allocs, stats.n_allocs - stats.n_frees,
+           stats.currently_allocated >> 10, stats.max_allocated >> 20);
+    for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
+      uptr c = stats.by_size_log[i];
+      if (!c) continue;
+      Printf("%zd:%zd; ", i, c);
+    }
+    Printf("\n");
+  }
+
+  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
+  // introspection API.
+  void ForceLock() {
+    mutex_.Lock();
+  }
+
+  void ForceUnlock() {
+    mutex_.Unlock();
   }
 
  private:
+  static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
   struct Header {
     uptr map_beg;
     uptr map_size;
     uptr size;
-    Header *next;
-    Header *prev;
+    uptr chunk_idx;
   };
 
   Header *GetHeader(uptr p) {
@@ -670,7 +1030,11 @@
   }
 
   uptr page_size_;
-  Header *list_;
+  Header *chunks_[kMaxNumChunks];
+  uptr n_chunks_;
+  struct Stats {
+    uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
+  } stats;
   SpinMutex mutex_;
 };
 
@@ -687,6 +1051,7 @@
   void Init() {
     primary_.Init();
     secondary_.Init();
+    stats_.Init();
   }
 
   void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
@@ -702,7 +1067,7 @@
     if (primary_.CanAllocate(size, alignment))
       res = cache->Allocate(&primary_, primary_.ClassID(size));
     else
-      res = secondary_.Allocate(size, alignment);
+      res = secondary_.Allocate(&stats_, size, alignment);
     if (alignment > 8)
       CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
     if (cleared && res)
@@ -715,7 +1080,7 @@
     if (primary_.PointerIsMine(p))
       cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
     else
-      secondary_.Deallocate(p);
+      secondary_.Deallocate(&stats_, p);
   }
 
   void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
@@ -742,6 +1107,10 @@
     return secondary_.PointerIsMine(p);
   }
 
+  bool FromPrimary(void *p) {
+    return primary_.PointerIsMine(p);
+  }
+
   void *GetMetaData(void *p) {
     if (primary_.PointerIsMine(p))
       return primary_.GetMetaData(p);
@@ -766,15 +1135,48 @@
 
   void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
 
+  void InitCache(AllocatorCache *cache) {
+    cache->Init(&stats_);
+  }
+
+  void DestroyCache(AllocatorCache *cache) {
+    cache->Destroy(&primary_, &stats_);
+  }
+
   void SwallowCache(AllocatorCache *cache) {
     cache->Drain(&primary_);
   }
 
+  void GetStats(AllocatorStatCounters s) const {
+    stats_.Get(s);
+  }
+
+  void PrintStats() {
+    primary_.PrintStats();
+    secondary_.PrintStats();
+  }
+
+  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
+  // introspection API.
+  void ForceLock() {
+    primary_.ForceLock();
+    secondary_.ForceLock();
+  }
+
+  void ForceUnlock() {
+    secondary_.ForceUnlock();
+    primary_.ForceUnlock();
+  }
+
  private:
   PrimaryAllocator primary_;
   SecondaryAllocator secondary_;
+  AllocatorGlobalStats stats_;
 };
 
+// Returns true if calloc(size, n) should return 0 due to overflow in size*n.
+bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n);
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_ALLOCATOR_H
diff --git a/lib/sanitizer_common/sanitizer_atomic_clang.h b/lib/sanitizer_common/sanitizer_atomic_clang.h
index af70441..7f73df3 100644
--- a/lib/sanitizer_common/sanitizer_atomic_clang.h
+++ b/lib/sanitizer_common/sanitizer_atomic_clang.h
@@ -41,6 +41,7 @@
       | memory_order_acquire | memory_order_seq_cst));
   DCHECK(!((uptr)a % sizeof(*a)));
   typename T::Type v;
+  // FIXME(dvyukov): 64-bit load is not atomic on 32-bits.
   if (mo == memory_order_relaxed) {
     v = a->val_dont_use;
   } else {
@@ -56,6 +57,7 @@
   DCHECK(mo & (memory_order_relaxed | memory_order_release
       | memory_order_seq_cst));
   DCHECK(!((uptr)a % sizeof(*a)));
+  // FIXME(dvyukov): 64-bit store is not atomic on 32-bits.
   if (mo == memory_order_relaxed) {
     a->val_dont_use = v;
   } else {
diff --git a/lib/sanitizer_common/sanitizer_atomic_msvc.h b/lib/sanitizer_common/sanitizer_atomic_msvc.h
index d7d5929..58a6a20 100644
--- a/lib/sanitizer_common/sanitizer_atomic_msvc.h
+++ b/lib/sanitizer_common/sanitizer_atomic_msvc.h
@@ -24,9 +24,31 @@
 extern "C" long _InterlockedExchangeAdd(  // NOLINT
     long volatile * Addend, long Value);  // NOLINT
 #pragma intrinsic(_InterlockedExchangeAdd)
-extern "C" void *InterlockedCompareExchangePointer(
+
+#ifdef _WIN64
+extern "C" void *_InterlockedCompareExchangePointer(
     void *volatile *Destination,
     void *Exchange, void *Comparand);
+#pragma intrinsic(_InterlockedCompareExchangePointer)
+#else
+// There's no _InterlockedCompareExchangePointer intrinsic on x86,
+// so call _InterlockedCompareExchange instead.
+extern "C"
+long __cdecl _InterlockedCompareExchange(  // NOLINT
+    long volatile *Destination,            // NOLINT
+    long Exchange, long Comparand);        // NOLINT
+#pragma intrinsic(_InterlockedCompareExchange)
+
+inline static void *_InterlockedCompareExchangePointer(
+    void *volatile *Destination,
+    void *Exchange, void *Comparand) {
+  return reinterpret_cast<void*>(
+      _InterlockedCompareExchange(
+          reinterpret_cast<long volatile*>(Destination),  // NOLINT
+          reinterpret_cast<long>(Exchange),               // NOLINT
+          reinterpret_cast<long>(Comparand)));            // NOLINT
+}
+#endif
 
 namespace __sanitizer {
 
@@ -50,6 +72,7 @@
       | memory_order_acquire | memory_order_seq_cst));
   DCHECK(!((uptr)a % sizeof(*a)));
   typename T::Type v;
+  // FIXME(dvyukov): 64-bit load is not atomic on 32-bits.
   if (mo == memory_order_relaxed) {
     v = a->val_dont_use;
   } else {
@@ -65,6 +88,7 @@
   DCHECK(mo & (memory_order_relaxed | memory_order_release
       | memory_order_seq_cst));
   DCHECK(!((uptr)a % sizeof(*a)));
+  // FIXME(dvyukov): 64-bit store is not atomic on 32-bits.
   if (mo == memory_order_relaxed) {
     a->val_dont_use = v;
   } else {
@@ -115,7 +139,7 @@
                                            uptr xchg,
                                            memory_order mo) {
   uptr cmpv = *cmp;
-  uptr prev = (uptr)InterlockedCompareExchangePointer(
+  uptr prev = (uptr)_InterlockedCompareExchangePointer(
       (void*volatile*)&a->val_dont_use, (void*)xchg, (void*)cmpv);
   if (prev == cmpv)
     return true;
diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc
index ca1f6bd..0518f41 100644
--- a/lib/sanitizer_common/sanitizer_common.cc
+++ b/lib/sanitizer_common/sanitizer_common.cc
@@ -16,6 +16,8 @@
 
 namespace __sanitizer {
 
+const char *SanitizerToolName = "SanitizerTool";
+
 uptr GetPageSizeCached() {
   static uptr PageSize;
   if (!PageSize)
@@ -23,10 +25,16 @@
   return PageSize;
 }
 
-// By default, dump to stderr. If report_fd is kInvalidFd, try to obtain file
-// descriptor by opening file in report_path.
+static bool log_to_file = false;  // Set to true by __sanitizer_set_report_path
+
+// By default, dump to stderr. If |log_to_file| is true and |report_fd_pid|
+// isn't equal to the current PID, try to obtain file descriptor by opening
+// file "report_path_prefix.<PID>".
 static fd_t report_fd = kStderrFd;
-static char report_path[4096];  // Set via __sanitizer_set_report_path.
+static char report_path_prefix[4096];  // Set via __sanitizer_set_report_path.
+// PID of process that opened |report_fd|. If a fork() occurs, the PID of the
+// child thread will be different from |report_fd_pid|.
+static int report_fd_pid = 0;
 
 static void (*DieCallback)(void);
 void SetDieCallback(void (*callback)(void)) {
@@ -37,7 +45,7 @@
   if (DieCallback) {
     DieCallback();
   }
-  Exit(1);
+  internal__exit(1);
 }
 
 static CheckFailedCallbackType CheckFailedCallback;
@@ -50,21 +58,29 @@
   if (CheckFailedCallback) {
     CheckFailedCallback(file, line, cond, v1, v2);
   }
-  Report("Sanitizer CHECK failed: %s:%d %s (%zd, %zd)\n", file, line, cond,
-                                                          v1, v2);
+  Report("Sanitizer CHECK failed: %s:%d %s (%lld, %lld)\n", file, line, cond,
+                                                            v1, v2);
   Die();
 }
 
 static void MaybeOpenReportFile() {
-  if (report_fd != kInvalidFd)
-    return;
-  fd_t fd = internal_open(report_path, true);
+  if (!log_to_file || (report_fd_pid == GetPid())) return;
+  InternalScopedBuffer<char> report_path_full(4096);
+  internal_snprintf(report_path_full.data(), report_path_full.size(),
+                    "%s.%d", report_path_prefix, GetPid());
+  fd_t fd = OpenFile(report_path_full.data(), true);
   if (fd == kInvalidFd) {
     report_fd = kStderrFd;
-    Report("ERROR: Can't open file: %s\n", report_path);
+    log_to_file = false;
+    Report("ERROR: Can't open file: %s\n", report_path_full.data());
     Die();
   }
+  if (report_fd != kInvalidFd) {
+    // We're in the child. Close the parent's log.
+    internal_close(report_fd);
+  }
   report_fd = fd;
+  report_fd_pid = GetPid();
 }
 
 bool PrintsToTty() {
@@ -91,7 +107,7 @@
   *buff_size = 0;
   // The files we usually open are not seekable, so try different buffer sizes.
   for (uptr size = kMinFileLen; size <= max_len; size *= 2) {
-    fd_t fd = internal_open(file_name, /*write*/ false);
+    fd_t fd = OpenFile(file_name, /*write*/ false);
     if (fd == kInvalidFd) return 0;
     UnmapOrDie(*buff, *buff_size);
     *buff = (char*)MmapOrDie(size, __FUNCTION__);
@@ -176,6 +192,16 @@
   return (void*)res;
 }
 
+void ReportErrorSummary(const char *error_type, const char *file,
+                        int line, const char *function) {
+  const int kMaxSize = 1024;  // We don't want a summary too long.
+  InternalScopedBuffer<char> buff(kMaxSize);
+  internal_snprintf(buff.data(), kMaxSize, "%s: %s %s:%d %s",
+                    SanitizerToolName, error_type,
+                    file ? file : "??", line, function ? function : "??");
+  __sanitizer_report_error_summary(buff.data());
+}
+
 }  // namespace __sanitizer
 
 using namespace __sanitizer;  // NOLINT
@@ -184,14 +210,16 @@
 void __sanitizer_set_report_path(const char *path) {
   if (!path) return;
   uptr len = internal_strlen(path);
-  if (len > sizeof(report_path) - 100) {
+  if (len > sizeof(report_path_prefix) - 100) {
     Report("ERROR: Path is too long: %c%c%c%c%c%c%c%c...\n",
            path[0], path[1], path[2], path[3],
            path[4], path[5], path[6], path[7]);
     Die();
   }
-  internal_snprintf(report_path, sizeof(report_path), "%s.%d", path, GetPid());
+  internal_strncpy(report_path_prefix, path, sizeof(report_path_prefix));
+  report_path_prefix[len] = '\0';
   report_fd = kInvalidFd;
+  log_to_file = true;
 }
 
 void __sanitizer_set_report_fd(int fd) {
@@ -207,4 +235,7 @@
   PrepareForSandboxing();
 }
 
+void __sanitizer_report_error_summary(const char *error_summary) {
+  Printf("SUMMARY: %s\n", error_summary);
+}
 }  // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_common.h b/lib/sanitizer_common/sanitizer_common.h
index 77fcc5c..021da70 100644
--- a/lib/sanitizer_common/sanitizer_common.h
+++ b/lib/sanitizer_common/sanitizer_common.h
@@ -17,8 +17,10 @@
 #define SANITIZER_COMMON_H
 
 #include "sanitizer_internal_defs.h"
+#include "sanitizer_libc.h"
 
 namespace __sanitizer {
+struct StackTrace;
 
 // Constants.
 const uptr kWordSize = SANITIZER_WORDSIZE / 8;
@@ -30,6 +32,8 @@
 const uptr kCacheLineSize = 64;
 #endif
 
+extern const char *SanitizerToolName;  // Can be changed by the tool.
+
 uptr GetPageSize();
 uptr GetPageSizeCached();
 uptr GetMmapGranularity();
@@ -44,11 +48,13 @@
 void *MmapOrDie(uptr size, const char *mem_type);
 void UnmapOrDie(void *addr, uptr size);
 void *MmapFixedNoReserve(uptr fixed_addr, uptr size);
+void *MmapFixedOrDie(uptr fixed_addr, uptr size);
 void *Mprotect(uptr fixed_addr, uptr size);
 // Map aligned chunk of address space; size and alignment are powers of two.
 void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type);
 // Used to check if we can map shadow memory to a fixed location.
 bool MemoryRangeIsAvailable(uptr range_start, uptr range_end);
+void FlushUnneededShadowMemory(uptr addr, uptr size);
 
 // Internal allocator
 void *InternalAlloc(uptr size);
@@ -103,6 +109,7 @@
 void Report(const char *format, ...);
 void SetPrintfAndReportCallback(void (*callback)(const char *));
 
+fd_t OpenFile(const char *filename, bool write);
 // Opens the file 'file_name" and reads up to 'max_len' bytes.
 // The resulting buffer is mmaped and stored in '*buff'.
 // The size of the mmaped region is stored in '*buff_size',
@@ -120,6 +127,7 @@
 bool FileExists(const char *filename);
 const char *GetEnv(const char *name);
 const char *GetPwd();
+u32 GetUid();
 void ReExec();
 bool StackSizeIsUnlimited();
 void SetStackSizeLimitInBytes(uptr limit);
@@ -133,7 +141,6 @@
 
 // Exit
 void NORETURN Abort();
-void NORETURN Exit(int exitcode);
 void NORETURN Die();
 void NORETURN SANITIZER_INTERFACE_ATTRIBUTE
 CheckFailed(const char *file, int line, const char *cond, u64 v1, u64 v2);
@@ -152,14 +159,79 @@
                                        u64, u64);
 void SetCheckFailedCallback(CheckFailedCallbackType callback);
 
+// Construct a one-line string like
+//  SanitizerToolName: error_type file:line function
+// and call __sanitizer_report_error_summary on it.
+void ReportErrorSummary(const char *error_type, const char *file,
+                        int line, const char *function);
+
 // Math
+#if defined(_WIN32) && !defined(__clang__)
+extern "C" {
+unsigned char _BitScanForward(unsigned long *index, unsigned long mask);  // NOLINT
+unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);  // NOLINT
+#if defined(_WIN64)
+unsigned char _BitScanForward64(unsigned long *index, unsigned __int64 mask);  // NOLINT
+unsigned char _BitScanReverse64(unsigned long *index, unsigned __int64 mask);  // NOLINT
+#endif
+}
+#endif
+
+INLINE uptr MostSignificantSetBitIndex(uptr x) {
+  CHECK_NE(x, 0U);
+  unsigned long up;  // NOLINT
+#if !defined(_WIN32) || defined(__clang__)
+  up = SANITIZER_WORDSIZE - 1 - __builtin_clzl(x);
+#elif defined(_WIN64)
+  _BitScanReverse64(&up, x);
+#else
+  _BitScanReverse(&up, x);
+#endif
+  return up;
+}
+
 INLINE bool IsPowerOfTwo(uptr x) {
   return (x & (x - 1)) == 0;
 }
+
+INLINE uptr RoundUpToPowerOfTwo(uptr size) {
+  CHECK(size);
+  if (IsPowerOfTwo(size)) return size;
+
+  uptr up = MostSignificantSetBitIndex(size);
+  CHECK(size < (1ULL << (up + 1)));
+  CHECK(size > (1ULL << up));
+  return 1UL << (up + 1);
+}
+
 INLINE uptr RoundUpTo(uptr size, uptr boundary) {
   CHECK(IsPowerOfTwo(boundary));
   return (size + boundary - 1) & ~(boundary - 1);
 }
+
+INLINE uptr RoundDownTo(uptr x, uptr boundary) {
+  return x & ~(boundary - 1);
+}
+
+INLINE bool IsAligned(uptr a, uptr alignment) {
+  return (a & (alignment - 1)) == 0;
+}
+
+INLINE uptr Log2(uptr x) {
+  CHECK(IsPowerOfTwo(x));
+#if !defined(_WIN32) || defined(__clang__)
+  return __builtin_ctzl(x);
+#elif defined(_WIN64)
+  unsigned long ret;  // NOLINT
+  _BitScanForward64(&ret, x);
+  return ret;
+#else
+  unsigned long ret;  // NOLINT
+  _BitScanForward(&ret, x);
+  return ret;
+#endif
+}
+
 // Don't use std::min, std::max or std::swap, to minimize dependency
 // on libstdc++.
 template<class T> T Min(T a, T b) { return a < b ? a : b; }
@@ -188,6 +260,65 @@
 # define FIRST_32_SECOND_64(a, b) (a)
 #endif
 
+// A low-level vector based on mmap. May incur a significant memory overhead for
+// small vectors.
+// WARNING: The current implementation supports only POD types.
+template<typename T>
+class InternalVector {
+ public:
+  explicit InternalVector(uptr initial_capacity) {
+    CHECK_GT(initial_capacity, 0);
+    capacity_ = initial_capacity;
+    size_ = 0;
+    data_ = (T *)MmapOrDie(capacity_ * sizeof(T), "InternalVector");
+  }
+  ~InternalVector() {
+    UnmapOrDie(data_, capacity_ * sizeof(T));
+  }
+  T &operator[](uptr i) {
+    CHECK_LT(i, size_);
+    return data_[i];
+  }
+  void push_back(const T &element) {
+    CHECK_LE(size_, capacity_);
+    if (size_ == capacity_) {
+      uptr new_capacity = RoundUpToPowerOfTwo(size_ + 1);
+      Resize(new_capacity);
+    }
+    data_[size_++] = element;
+  }
+  T &back() {
+    CHECK_GT(size_, 0);
+    return data_[size_ - 1];
+  }
+  void pop_back() {
+    CHECK_GT(size_, 0);
+    size_--;
+  }
+  uptr size() {
+    return size_;
+  }
+
+ private:
+  void Resize(uptr new_capacity) {
+    CHECK_GT(new_capacity, 0);
+    CHECK_LE(size_, new_capacity);
+    T *new_data = (T *)MmapOrDie(new_capacity * sizeof(T),
+                                 "InternalVector");
+    internal_memcpy(new_data, data_, size_ * sizeof(T));
+    T *old_data = data_;
+    data_ = new_data;
+    UnmapOrDie(old_data, capacity_ * sizeof(T));
+    capacity_ = new_capacity;
+  }
+  // Disallow evil constructors.
+  InternalVector(const InternalVector&);
+  void operator=(const InternalVector&);
+
+  T *data_;
+  uptr capacity_;
+  uptr size_;
+};
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_COMMON_H
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc
new file mode 100644
index 0000000..0478262
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -0,0 +1,339 @@
+//===-- sanitizer_common_interceptors.inc -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common function interceptors for tools like AddressSanitizer,
+// ThreadSanitizer, MemorySanitizer, etc.
+//
+// This file should be included into the tool's interceptor file,
+// which has to define it's own macros:
+//   COMMON_INTERCEPTOR_ENTER
+//   COMMON_INTERCEPTOR_READ_RANGE
+//   COMMON_INTERCEPTOR_WRITE_RANGE
+//   COMMON_INTERCEPTOR_FD_ACQUIRE
+//   COMMON_INTERCEPTOR_FD_RELEASE
+//   COMMON_INTERCEPTOR_SET_THREAD_NAME
+//===----------------------------------------------------------------------===//
+#include "interception/interception.h"
+#include "sanitizer_platform_interceptors.h"
+
+#include <stdarg.h>
+
+#ifdef _WIN32
+#define va_copy(dst, src) ((dst) = (src))
+#endif // _WIN32
+
+#if SANITIZER_INTERCEPT_READ
+INTERCEPTOR(SSIZE_T, read, int fd, void *ptr, SIZE_T count) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, read, fd, ptr, count);
+  SSIZE_T res = REAL(read)(fd, ptr, count);
+  if (res > 0)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
+  if (res >= 0 && fd >= 0)
+    COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
+  return res;
+}
+#define INIT_READ INTERCEPT_FUNCTION(read)
+#else
+#define INIT_READ
+#endif
+
+#if SANITIZER_INTERCEPT_PREAD
+INTERCEPTOR(SSIZE_T, pread, int fd, void *ptr, SIZE_T count, OFF_T offset) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, pread, fd, ptr, count, offset);
+  SSIZE_T res = REAL(pread)(fd, ptr, count, offset);
+  if (res > 0)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
+  if (res >= 0 && fd >= 0)
+    COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
+  return res;
+}
+#define INIT_PREAD INTERCEPT_FUNCTION(pread)
+#else
+#define INIT_PREAD
+#endif
+
+#if SANITIZER_INTERCEPT_PREAD64
+INTERCEPTOR(SSIZE_T, pread64, int fd, void *ptr, SIZE_T count, OFF64_T offset) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, pread64, fd, ptr, count, offset);
+  SSIZE_T res = REAL(pread64)(fd, ptr, count, offset);
+  if (res > 0)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
+  if (res >= 0 && fd >= 0)
+    COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
+  return res;
+}
+#define INIT_PREAD64 INTERCEPT_FUNCTION(pread64)
+#else
+#define INIT_PREAD64
+#endif
+
+#if SANITIZER_INTERCEPT_WRITE
+INTERCEPTOR(SSIZE_T, write, int fd, void *ptr, SIZE_T count) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, write, fd, ptr, count);
+  if (fd >= 0)
+    COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
+  SSIZE_T res = REAL(write)(fd, ptr, count);
+  if (res > 0)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
+  return res;
+}
+#define INIT_WRITE INTERCEPT_FUNCTION(write)
+#else
+#define INIT_WRITE
+#endif
+
+#if SANITIZER_INTERCEPT_PWRITE
+INTERCEPTOR(SSIZE_T, pwrite, int fd, void *ptr, SIZE_T count, OFF_T offset) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, pwrite, fd, ptr, count, offset);
+  if (fd >= 0)
+    COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
+  SSIZE_T res = REAL(pwrite)(fd, ptr, count, offset);
+  if (res > 0)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
+  return res;
+}
+#define INIT_PWRITE INTERCEPT_FUNCTION(pwrite)
+#else
+#define INIT_PWRITE
+#endif
+
+#if SANITIZER_INTERCEPT_PWRITE64
+INTERCEPTOR(SSIZE_T, pwrite64, int fd, void *ptr, OFF64_T count,
+            OFF64_T offset) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, pwrite64, fd, ptr, count, offset);
+  if (fd >= 0)
+    COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
+  SSIZE_T res = REAL(pwrite64)(fd, ptr, count, offset);
+  if (res > 0)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
+  return res;
+}
+#define INIT_PWRITE64 INTERCEPT_FUNCTION(pwrite64)
+#else
+#define INIT_PWRITE64
+#endif
+
+#if SANITIZER_INTERCEPT_PRCTL
+INTERCEPTOR(int, prctl, int option,
+            unsigned long arg2, unsigned long arg3,   // NOLINT
+            unsigned long arg4, unsigned long arg5) { // NOLINT
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, prctl, option, arg2, arg3, arg4, arg5);
+  static const int PR_SET_NAME = 15;
+  int res = REAL(prctl(option, arg2, arg3, arg4, arg5));
+  if (option == PR_SET_NAME) {
+    char buff[16];
+    internal_strncpy(buff, (char *)arg2, 15);
+    buff[15] = 0;
+    COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, buff);
+  }
+  return res;
+}
+#define INIT_PRCTL INTERCEPT_FUNCTION(prctl)
+#else
+#define INIT_PRCTL
+#endif // SANITIZER_INTERCEPT_PRCTL
+
+#if SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS
+INTERCEPTOR(void *, localtime, unsigned long *timep) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, localtime, timep);
+  void *res = REAL(localtime)(timep);
+  if (res) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, struct_tm_sz);
+  }
+  return res;
+}
+INTERCEPTOR(void *, localtime_r, unsigned long *timep, void *result) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, localtime_r, timep, result);
+  void *res = REAL(localtime_r)(timep, result);
+  if (res) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, struct_tm_sz);
+  }
+  return res;
+}
+INTERCEPTOR(void *, gmtime, unsigned long *timep) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, gmtime, timep);
+  void *res = REAL(gmtime)(timep);
+  if (res) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, struct_tm_sz);
+  }
+  return res;
+}
+INTERCEPTOR(void *, gmtime_r, unsigned long *timep, void *result) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, gmtime_r, timep, result);
+  void *res = REAL(gmtime_r)(timep, result);
+  if (res) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, struct_tm_sz);
+  }
+  return res;
+}
+INTERCEPTOR(char *, ctime, unsigned long *timep) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, ctime, timep);
+  char *res = REAL(ctime)(timep);
+  if (res) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+  }
+  return res;
+}
+INTERCEPTOR(char *, ctime_r, unsigned long *timep, char *result) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, ctime_r, timep, result);
+  char *res = REAL(ctime_r)(timep, result);
+  if (res) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+  }
+  return res;
+}
+INTERCEPTOR(char *, asctime, void *tm) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, asctime, tm);
+  char *res = REAL(asctime)(tm);
+  if (res) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, struct_tm_sz);
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+  }
+  return res;
+}
+INTERCEPTOR(char *, asctime_r, void *tm, char *result) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, asctime_r, tm, result);
+  char *res = REAL(asctime_r)(tm, result);
+  if (res) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, struct_tm_sz);
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+  }
+  return res;
+}
+#define INIT_LOCALTIME_AND_FRIENDS               \
+  INTERCEPT_FUNCTION(localtime);                 \
+  INTERCEPT_FUNCTION(localtime_r);               \
+  INTERCEPT_FUNCTION(gmtime);                    \
+  INTERCEPT_FUNCTION(gmtime_r);                  \
+  INTERCEPT_FUNCTION(ctime);                     \
+  INTERCEPT_FUNCTION(ctime_r);                   \
+  INTERCEPT_FUNCTION(asctime);                   \
+  INTERCEPT_FUNCTION(asctime_r);
+#else
+#define INIT_LOCALTIME_AND_FRIENDS
+#endif // SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS
+
+#if SANITIZER_INTERCEPT_SCANF
+
+#include "sanitizer_common_interceptors_scanf.inc"
+
+#define VSCANF_INTERCEPTOR_IMPL(vname, allowGnuMalloc, ...)                    \
+  {                                                                            \
+    void *ctx;                                                                 \
+    COMMON_INTERCEPTOR_ENTER(ctx, vname, __VA_ARGS__);                         \
+    va_list aq;                                                                \
+    va_copy(aq, ap);                                                           \
+    int res = REAL(vname)(__VA_ARGS__);                                        \
+    if (res > 0)                                                               \
+      scanf_common(ctx, res, allowGnuMalloc, format, aq);                      \
+    va_end(aq);                                                                \
+    return res;                                                                \
+  }
+
+INTERCEPTOR(int, vscanf, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(vscanf, true, format, ap)
+
+INTERCEPTOR(int, vsscanf, const char *str, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(vsscanf, true, str, format, ap)
+
+INTERCEPTOR(int, vfscanf, void *stream, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(vfscanf, true, stream, format, ap)
+
+#if SANITIZER_INTERCEPT_ISOC99_SCANF
+INTERCEPTOR(int, __isoc99_vscanf, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc99_vscanf, false, format, ap)
+
+INTERCEPTOR(int, __isoc99_vsscanf, const char *str, const char *format,
+            va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap)
+
+INTERCEPTOR(int, __isoc99_vfscanf, void *stream, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap)
+#endif  // SANITIZER_INTERCEPT_ISOC99_SCANF
+
+#define SCANF_INTERCEPTOR_IMPL(name, vname, ...)                               \
+  {                                                                            \
+    void *ctx;                                                                 \
+    COMMON_INTERCEPTOR_ENTER(ctx, name, __VA_ARGS__);                          \
+    va_list ap;                                                                \
+    va_start(ap, format);                                                      \
+    int res = vname(__VA_ARGS__, ap);                                          \
+    va_end(ap);                                                                \
+    return res;                                                                \
+  }
+
+INTERCEPTOR(int, scanf, const char *format, ...)
+SCANF_INTERCEPTOR_IMPL(scanf, vscanf, format)
+
+INTERCEPTOR(int, fscanf, void *stream, const char *format, ...)
+SCANF_INTERCEPTOR_IMPL(fscanf, vfscanf, stream, format)
+
+INTERCEPTOR(int, sscanf, const char *str, const char *format, ...)
+SCANF_INTERCEPTOR_IMPL(sscanf, vsscanf, str, format)
+
+#if SANITIZER_INTERCEPT_ISOC99_SCANF
+INTERCEPTOR(int, __isoc99_scanf, const char *format, ...)
+SCANF_INTERCEPTOR_IMPL(__isoc99_scanf, __isoc99_vscanf, format)
+
+INTERCEPTOR(int, __isoc99_fscanf, void *stream, const char *format, ...)
+SCANF_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format)
+
+INTERCEPTOR(int, __isoc99_sscanf, const char *str, const char *format, ...)
+SCANF_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
+#endif
+
+#define INIT_SCANF                                                             \
+  INTERCEPT_FUNCTION(scanf);                                                   \
+  INTERCEPT_FUNCTION(sscanf);                                                  \
+  INTERCEPT_FUNCTION(fscanf);                                                  \
+  INTERCEPT_FUNCTION(vscanf);                                                  \
+  INTERCEPT_FUNCTION(vsscanf);                                                 \
+  INTERCEPT_FUNCTION(vfscanf);                                                 \
+  INTERCEPT_FUNCTION(__isoc99_scanf);                                          \
+  INTERCEPT_FUNCTION(__isoc99_sscanf);                                         \
+  INTERCEPT_FUNCTION(__isoc99_fscanf);                                         \
+  INTERCEPT_FUNCTION(__isoc99_vscanf);                                         \
+  INTERCEPT_FUNCTION(__isoc99_vsscanf);                                        \
+  INTERCEPT_FUNCTION(__isoc99_vfscanf);
+
+#else
+#define INIT_SCANF
+#endif
+
+#define SANITIZER_COMMON_INTERCEPTORS_INIT                                     \
+  INIT_READ;                                                                   \
+  INIT_PREAD;                                                                  \
+  INIT_PREAD64;                                                                \
+  INIT_PRCTL;                                                                  \
+  INIT_WRITE;                                                                  \
+  INIT_PWRITE;                                                                 \
+  INIT_PWRITE64;                                                               \
+  INIT_LOCALTIME_AND_FRIENDS;                                                  \
+  INIT_SCANF;
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc b/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc
new file mode 100644
index 0000000..8bb5cd8
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_common_interceptors_scanf.inc
@@ -0,0 +1,309 @@
+//===-- sanitizer_common_interceptors_scanf.inc -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Scanf implementation for use in *Sanitizer interceptors.
+// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
+// with a few common GNU extensions.
+//
+//===----------------------------------------------------------------------===//
+#include <stdarg.h>
+
+struct ScanfDirective {
+  int argIdx; // argument index, or -1 of not specified ("%n$")
+  int fieldWidth;
+  bool suppressed; // suppress assignment ("*")
+  bool allocate;   // allocate space ("m")
+  char lengthModifier[2];
+  char convSpecifier;
+  bool maybeGnuMalloc;
+};
+
+static const char *parse_number(const char *p, int *out) {
+  *out = internal_atoll(p);
+  while (*p >= '0' && *p <= '9')
+    ++p;
+  return p;
+}
+
+static bool char_is_one_of(char c, const char *s) {
+  return !!internal_strchr(s, c);
+}
+
+// Parse scanf format string. If a valid directive in encountered, it is
+// returned in dir. This function returns the pointer to the first
+// unprocessed character, or 0 in case of error.
+// In case of the end-of-string, a pointer to the closing \0 is returned.
+static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
+                                    ScanfDirective *dir) {
+  internal_memset(dir, 0, sizeof(*dir));
+  dir->argIdx = -1;
+
+  while (*p) {
+    if (*p != '%') {
+      ++p;
+      continue;
+    }
+    ++p;
+    // %%
+    if (*p == '%') {
+      ++p;
+      continue;
+    }
+    if (*p == '\0') {
+      return 0;
+    }
+    // %n$
+    if (*p >= '0' && *p <= '9') {
+      int number;
+      const char *q = parse_number(p, &number);
+      if (*q == '$') {
+        dir->argIdx = number;
+        p = q + 1;
+      }
+      // Otherwise, do not change p. This will be re-parsed later as the field
+      // width.
+    }
+    // *
+    if (*p == '*') {
+      dir->suppressed = true;
+      ++p;
+    }
+    // Field width.
+    if (*p >= '0' && *p <= '9') {
+      p = parse_number(p, &dir->fieldWidth);
+      if (dir->fieldWidth <= 0)
+        return 0;
+    }
+    // m
+    if (*p == 'm') {
+      dir->allocate = true;
+      ++p;
+    }
+    // Length modifier.
+    if (char_is_one_of(*p, "jztLq")) {
+      dir->lengthModifier[0] = *p;
+      ++p;
+    } else if (*p == 'h') {
+      dir->lengthModifier[0] = 'h';
+      ++p;
+      if (*p == 'h') {
+        dir->lengthModifier[1] = 'h';
+        ++p;
+      }
+    } else if (*p == 'l') {
+      dir->lengthModifier[0] = 'l';
+      ++p;
+      if (*p == 'l') {
+        dir->lengthModifier[1] = 'l';
+        ++p;
+      }
+    }
+    // Conversion specifier.
+    dir->convSpecifier = *p++;
+    // Consume %[...] expression.
+    if (dir->convSpecifier == '[') {
+      if (*p == '^')
+        ++p;
+      if (*p == ']')
+        ++p;
+      while (*p && *p != ']')
+        ++p;
+      if (*p == 0)
+        return 0; // unexpected end of string
+                  // Consume the closing ']'.
+      ++p;
+    }
+    // This is unfortunately ambiguous between old GNU extension
+    // of %as, %aS and %a[...] and newer POSIX %a followed by
+    // letters s, S or [.
+    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
+        !dir->lengthModifier[0]) {
+      if (*p == 's' || *p == 'S') {
+        dir->maybeGnuMalloc = true;
+        ++p;
+      } else if (*p == '[') {
+        // Watch for %a[h-j%d], if % appears in the
+        // [...] range, then we need to give up, we don't know
+        // if scanf will parse it as POSIX %a [h-j %d ] or
+        // GNU allocation of string with range dh-j plus %.
+        const char *q = p + 1;
+        if (*q == '^')
+          ++q;
+        if (*q == ']')
+          ++q;
+        while (*q && *q != ']' && *q != '%')
+          ++q;
+        if (*q == 0 || *q == '%')
+          return 0;
+        p = q + 1; // Consume the closing ']'.
+        dir->maybeGnuMalloc = true;
+      }
+    }
+    break;
+  }
+  return p;
+}
+
+// Returns true if the character is an integer conversion specifier.
+static bool scanf_is_integer_conv(char c) {
+  return char_is_one_of(c, "diouxXn");
+}
+
+// Returns true if the character is an floating point conversion specifier.
+static bool scanf_is_float_conv(char c) {
+  return char_is_one_of(c, "aAeEfFgG");
+}
+
+// Returns string output character size for string-like conversions,
+// or 0 if the conversion is invalid.
+static int scanf_get_char_size(ScanfDirective *dir) {
+  if (char_is_one_of(dir->convSpecifier, "CS")) {
+    // wchar_t
+    return 0;
+  }
+
+  if (char_is_one_of(dir->convSpecifier, "cs[")) {
+    if (dir->lengthModifier[0] == 'l')
+      // wchar_t
+      return 0;
+    else if (dir->lengthModifier[0] == 0)
+      return sizeof(char);
+    else
+      return 0;
+  }
+
+  return 0;
+}
+
+enum ScanfStoreSize {
+  // Store size not known in advance; can be calculated as strlen() of the
+  // destination buffer.
+  SSS_STRLEN = -1,
+  // Invalid conversion specifier.
+  SSS_INVALID = 0
+};
+
+// Returns the store size of a scanf directive (if >0), or a value of
+// ScanfStoreSize.
+static int scanf_get_store_size(ScanfDirective *dir) {
+  if (dir->allocate) {
+    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
+      return SSS_INVALID;
+    return sizeof(char *);
+  }
+
+  if (dir->maybeGnuMalloc) {
+    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
+      return SSS_INVALID;
+    // This is ambiguous, so check the smaller size of char * (if it is
+    // a GNU extension of %as, %aS or %a[...]) and float (if it is
+    // POSIX %a followed by s, S or [ letters).
+    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
+  }
+
+  if (scanf_is_integer_conv(dir->convSpecifier)) {
+    switch (dir->lengthModifier[0]) {
+    case 'h':
+      return dir->lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
+    case 'l':
+      return dir->lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
+    case 'L':
+      return sizeof(long long);
+    case 'j':
+      return sizeof(INTMAX_T);
+    case 'z':
+      return sizeof(SIZE_T);
+    case 't':
+      return sizeof(PTRDIFF_T);
+    case 0:
+      return sizeof(int);
+    default:
+      return SSS_INVALID;
+    }
+  }
+
+  if (scanf_is_float_conv(dir->convSpecifier)) {
+    switch (dir->lengthModifier[0]) {
+    case 'L':
+    case 'q':
+      return sizeof(long double);
+    case 'l':
+      return dir->lengthModifier[1] == 'l' ? sizeof(long double)
+                                           : sizeof(double);
+    case 0:
+      return sizeof(float);
+    default:
+      return SSS_INVALID;
+    }
+  }
+
+  if (char_is_one_of(dir->convSpecifier, "sS[")) {
+    unsigned charSize = scanf_get_char_size(dir);
+    if (charSize == 0)
+      return SSS_INVALID;
+    if (dir->fieldWidth == 0)
+      return SSS_STRLEN;
+    return (dir->fieldWidth + 1) * charSize;
+  }
+
+  if (char_is_one_of(dir->convSpecifier, "cC")) {
+    unsigned charSize = scanf_get_char_size(dir);
+    if (charSize == 0)
+      return SSS_INVALID;
+    if (dir->fieldWidth == 0)
+      return charSize;
+    return dir->fieldWidth * charSize;
+  }
+
+  if (dir->convSpecifier == 'p') {
+    if (dir->lengthModifier[1] != 0)
+      return SSS_INVALID;
+    return sizeof(void *);
+  }
+
+  return SSS_INVALID;
+}
+
+// Common part of *scanf interceptors.
+// Process format string and va_list, and report all store ranges.
+// Stops when "consuming" n_inputs input items.
+static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
+                         const char *format, va_list aq) {
+  CHECK_GT(n_inputs, 0);
+  const char *p = format;
+
+  while (*p && n_inputs) {
+    ScanfDirective dir;
+    p = scanf_parse_next(p, allowGnuMalloc, &dir);
+    if (!p)
+      break;
+    if (dir.convSpecifier == 0) {
+      // This can only happen at the end of the format string.
+      CHECK_EQ(*p, 0);
+      break;
+    }
+    // Here the directive is valid. Do what it says.
+    if (dir.argIdx != -1) {
+      // Unsupported.
+      break;
+    }
+    if (dir.suppressed)
+      continue;
+    int size = scanf_get_store_size(&dir);
+    if (size == SSS_INVALID)
+      break;
+    void *argp = va_arg(aq, void *);
+    if (dir.convSpecifier != 'n')
+      --n_inputs;
+    if (size == SSS_STRLEN) {
+      size = internal_strlen((const char *)argp) + 1;
+    }
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
+  }
+}
diff --git a/lib/sanitizer_common/sanitizer_flags.cc b/lib/sanitizer_common/sanitizer_flags.cc
index eca910c..2ef4278 100644
--- a/lib/sanitizer_common/sanitizer_flags.cc
+++ b/lib/sanitizer_common/sanitizer_flags.cc
@@ -38,7 +38,8 @@
       pos += 1;
       end = internal_strchr(pos, '\'');
     } else {
-      end = internal_strchr(pos, ' ');
+      // Read until the next space or colon.
+      end = pos + internal_strcspn(pos, " :");
     }
     if (end == 0)
       end = pos + internal_strlen(pos);
diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h
index 2e56fac..e052cbd 100644
--- a/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -13,7 +13,95 @@
 #ifndef SANITIZER_DEFS_H
 #define SANITIZER_DEFS_H
 
-#include "sanitizer/common_interface_defs.h"
+#if defined(_WIN32)
+// FIXME find out what we need on Windows. __declspec(dllexport) ?
+# define SANITIZER_INTERFACE_ATTRIBUTE
+# define SANITIZER_WEAK_ATTRIBUTE
+#elif defined(SANITIZER_GO)
+# define SANITIZER_INTERFACE_ATTRIBUTE
+# define SANITIZER_WEAK_ATTRIBUTE
+#else
+# define SANITIZER_INTERFACE_ATTRIBUTE __attribute__((visibility("default")))
+# define SANITIZER_WEAK_ATTRIBUTE  __attribute__((weak))
+#endif
+
+#ifdef __linux__
+# define SANITIZER_SUPPORTS_WEAK_HOOKS 1
+#else
+# define SANITIZER_SUPPORTS_WEAK_HOOKS 0
+#endif
+
+// GCC does not understand __has_feature
+#if !defined(__has_feature)
+# define __has_feature(x) 0
+#endif
+
+// For portability reasons we do not include stddef.h, stdint.h or any other
+// system header, but we do need some basic types that are not defined
+// in a portable way by the language itself.
+namespace __sanitizer {
+
+#if defined(_WIN64)
+// 64-bit Windows uses LLP64 data model.
+typedef unsigned long long uptr;  // NOLINT
+typedef signed   long long sptr;  // NOLINT
+#else
+typedef unsigned long uptr;  // NOLINT
+typedef signed   long sptr;  // NOLINT
+#endif  // defined(_WIN64)
+#if defined(__x86_64__)
+// Since x32 uses ILP32 data model in 64-bit hardware mode,  we must use
+// 64-bit pointer to unwind stack frame.
+typedef unsigned long long uhwptr;  // NOLINT
+#else
+typedef uptr uhwptr;   // NOLINT
+#endif
+typedef unsigned char u8;
+typedef unsigned short u16;  // NOLINT
+typedef unsigned int u32;
+typedef unsigned long long u64;  // NOLINT
+typedef signed   char s8;
+typedef signed   short s16;  // NOLINT
+typedef signed   int s32;
+typedef signed   long long s64;  // NOLINT
+typedef int fd_t;
+
+// WARNING: OFF_T may be different from OS type off_t, depending on the value of
+// _FILE_OFFSET_BITS. This definition of OFF_T matches the ABI of system calls
+// like pread and mmap, as opposed to pread64 and mmap64.
+// Mac and Linux/x86-64 are special.
+#if defined(__APPLE__) || (defined(__linux__) && defined(__x86_64__))
+typedef u64 OFF_T;
+#else
+typedef uptr OFF_T;
+#endif
+typedef u64  OFF64_T;
+}  // namespace __sanitizer
+
+extern "C" {
+  // Tell the tools to write their reports to "path.<pid>" instead of stderr.
+  void __sanitizer_set_report_path(const char *path)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // Tell the tools to write their reports to given file descriptor instead of
+  // stderr.
+  void __sanitizer_set_report_fd(int fd)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // Notify the tools that the sandbox is going to be turned on. The reserved
+  // parameter will be used in the future to hold a structure with functions
+  // that the tools may call to bypass the sandbox.
+  void __sanitizer_sandbox_on_notify(void *reserved)
+      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // This function is called by the tool when it has just finished reporting
+  // an error. 'error_summary' is a one-line string that summarizes
+  // the error message. This function can be overridden by the client.
+  void __sanitizer_report_error_summary(const char *error_summary)
+      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
+}  // extern "C"
+
+
 using namespace __sanitizer;  // NOLINT
 // ----------- ATTENTION -------------
 // This header should NOT include any other headers to avoid portability issues.
@@ -38,6 +126,7 @@
 # define UNLIKELY(x) (x)
 # define UNUSED
 # define USED
+# define PREFETCH(x) /* _mm_prefetch(x, _MM_HINT_NTA) */
 #else  // _MSC_VER
 # define ALWAYS_INLINE __attribute__((always_inline))
 # define ALIAS(x) __attribute__((alias(x)))
@@ -51,6 +140,12 @@
 # define UNLIKELY(x)   __builtin_expect(!!(x), 0)
 # define UNUSED __attribute__((unused))
 # define USED __attribute__((used))
+# if defined(__i386__) || defined(__x86_64__)
+// __builtin_prefetch(x) generates prefetchnt0 on x86
+#  define PREFETCH(x) __asm__("prefetchnta (%0)" : : "r" (x))
+# else
+#  define PREFETCH(x) __builtin_prefetch(x)
+# endif
 #endif  // _MSC_VER
 
 #if defined(_WIN32)
diff --git a/lib/sanitizer_common/sanitizer_lfstack.h b/lib/sanitizer_common/sanitizer_lfstack.h
new file mode 100644
index 0000000..0884139
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_lfstack.h
@@ -0,0 +1,73 @@
+//===-- sanitizer_lfstack.h -=-----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Lock-free stack.
+// Uses 32/17 bits as ABA-counter on 32/64-bit platforms.
+// The memory passed to Push() must not be ever munmap'ed.
+// The type T must contain T *next field.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_LFSTACK_H
+#define SANITIZER_LFSTACK_H
+
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_common.h"
+#include "sanitizer_atomic.h"
+
+namespace __sanitizer {
+
+template<typename T>
+struct LFStack {
+  void Clear() {
+    atomic_store(&head_, 0, memory_order_relaxed);
+  }
+
+  bool Empty() const {
+    return (atomic_load(&head_, memory_order_relaxed) & kPtrMask) == 0;
+  }
+
+  void Push(T *p) {
+    u64 cmp = atomic_load(&head_, memory_order_relaxed);
+    for (;;) {
+      u64 cnt = (cmp & kCounterMask) + kCounterInc;
+      u64 xch = (u64)(uptr)p | cnt;
+      p->next = (T*)(uptr)(cmp & kPtrMask);
+      if (atomic_compare_exchange_weak(&head_, &cmp, xch,
+                                       memory_order_release))
+        break;
+    }
+  }
+
+  T *Pop() {
+    u64 cmp = atomic_load(&head_, memory_order_acquire);
+    for (;;) {
+      T *cur = (T*)(uptr)(cmp & kPtrMask);
+      if (cur == 0)
+        return 0;
+      T *nxt = cur->next;
+      u64 cnt = (cmp & kCounterMask);
+      u64 xch = (u64)(uptr)nxt | cnt;
+      if (atomic_compare_exchange_weak(&head_, &cmp, xch,
+                                       memory_order_acquire))
+        return cur;
+    }
+  }
+
+  // private:
+  static const int kCounterBits = FIRST_32_SECOND_64(32, 17);
+  static const u64 kPtrMask = ((u64)-1) >> kCounterBits;
+  static const u64 kCounterMask = ~kPtrMask;
+  static const u64 kCounterInc = kPtrMask + 1;
+
+  atomic_uint64_t head_;
+};
+}  // namespace __sanitizer
+
+#endif  // #ifndef SANITIZER_LFSTACK_H
diff --git a/lib/sanitizer_common/sanitizer_libc.cc b/lib/sanitizer_common/sanitizer_libc.cc
index 01eaef3..20c03c4 100644
--- a/lib/sanitizer_common/sanitizer_libc.cc
+++ b/lib/sanitizer_common/sanitizer_libc.cc
@@ -205,4 +205,23 @@
   }
 }
 
+bool mem_is_zero(const char *beg, uptr size) {
+  CHECK_LE(size, 1ULL << FIRST_32_SECOND_64(30, 40));  // Sanity check.
+  const char *end = beg + size;
+  uptr *aligned_beg = (uptr *)RoundUpTo((uptr)beg, sizeof(uptr));
+  uptr *aligned_end = (uptr *)RoundDownTo((uptr)end, sizeof(uptr));
+  uptr all = 0;
+  // Prologue.
+  for (const char *mem = beg; mem < (char*)aligned_beg && mem < end; mem++)
+    all |= *mem;
+  // Aligned loop.
+  for (; aligned_beg < aligned_end; aligned_beg++)
+    all |= *aligned_beg;
+  // Epilogue.
+  if ((char*)aligned_end >= beg)
+    for (const char *mem = (char*)aligned_end; mem < end; mem++)
+      all |= *mem;
+  return all == 0;
+}
+
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_libc.h b/lib/sanitizer_common/sanitizer_libc.h
index 7979483..7c2a1b8 100644
--- a/lib/sanitizer_common/sanitizer_libc.h
+++ b/lib/sanitizer_common/sanitizer_libc.h
@@ -11,14 +11,13 @@
 // run-time libraries.
 // These tools can not use some of the libc functions directly because those
 // functions are intercepted. Instead, we implement a tiny subset of libc here.
-// NOTE: This file may be included into user code.
 //===----------------------------------------------------------------------===//
 #ifndef SANITIZER_LIBC_H
 #define SANITIZER_LIBC_H
 
 // ----------- ATTENTION -------------
 // This header should NOT include any other headers from sanitizer runtime.
-#include "sanitizer/common_interface_defs.h"
+#include "sanitizer_internal_defs.h"
 
 namespace __sanitizer {
 
@@ -46,6 +45,12 @@
 char *internal_strstr(const char *haystack, const char *needle);
 // Works only for base=10 and doesn't set errno.
 s64 internal_simple_strtoll(const char *nptr, char **endptr, int base);
+int internal_snprintf(char *buffer, uptr length, const char *format, ...);
+
+// Return true if all bytes in [mem, mem+size) are zero.
+// Optimized for the case when the result is true.
+bool mem_is_zero(const char *mem, uptr size);
+
 
 // Memory
 void *internal_mmap(void *addr, uptr length, int prot, int flags,
@@ -53,20 +58,33 @@
 int internal_munmap(void *addr, uptr length);
 
 // I/O
-typedef int fd_t;
 const fd_t kInvalidFd = -1;
 const fd_t kStdinFd = 0;
 const fd_t kStdoutFd = 1;
 const fd_t kStderrFd = 2;
 int internal_close(fd_t fd);
 int internal_isatty(fd_t fd);
-fd_t internal_open(const char *filename, bool write);
+
+// Use __sanitizer::OpenFile() instead.
+fd_t internal_open(const char *filename, int flags);
+fd_t internal_open(const char *filename, int flags, u32 mode);
+
 uptr internal_read(fd_t fd, void *buf, uptr count);
 uptr internal_write(fd_t fd, const void *buf, uptr count);
+
+// OS
 uptr internal_filesize(fd_t fd);  // -1 on error.
+int internal_stat(const char *path, void *buf);
+int internal_lstat(const char *path, void *buf);
+int internal_fstat(fd_t fd, void *buf);
 int internal_dup2(int oldfd, int newfd);
 uptr internal_readlink(const char *path, char *buf, uptr bufsize);
-int internal_snprintf(char *buffer, uptr length, const char *format, ...);
+void NORETURN internal__exit(int exitcode);
+OFF_T internal_lseek(fd_t fd, OFF_T offset, int whence);
+
+long internal_ptrace(int request, int pid, void *addr, void *data);
+int internal_waitpid(int pid, int *status, int options);
+int internal_getppid();
 
 // Threading
 int internal_sched_yield();
diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc
index 5be76e9..9b51d2c 100644
--- a/lib/sanitizer_common/sanitizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_linux.cc
@@ -16,22 +16,34 @@
 #include "sanitizer_common.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_libc.h"
+#include "sanitizer_linux.h"
 #include "sanitizer_mutex.h"
 #include "sanitizer_placement_new.h"
 #include "sanitizer_procmaps.h"
+#include "sanitizer_stacktrace.h"
 
+#include <errno.h>
 #include <fcntl.h>
 #include <pthread.h>
 #include <sched.h>
 #include <sys/mman.h>
+#include <sys/ptrace.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/time.h>
 #include <sys/types.h>
-#include <unistd.h>
-#include <errno.h>
 #include <sys/prctl.h>
+#include <unistd.h>
+#include <unwind.h>
+
+#if !defined(__ANDROID__) && !defined(ANDROID)
+#include <sys/signal.h>
+#endif
+
+// <linux/futex.h> is broken on some linux distributions.
+const int FUTEX_WAIT = 0;
+const int FUTEX_WAKE = 1;
 
 // Are we using 32-bit or 64-bit syscalls?
 // x32 (which defines __x86_64__) has SANITIZER_WORDSIZE == 32
@@ -62,8 +74,16 @@
   return syscall(__NR_close, fd);
 }
 
-fd_t internal_open(const char *filename, bool write) {
-  return syscall(__NR_open, filename,
+fd_t internal_open(const char *filename, int flags) {
+  return syscall(__NR_open, filename, flags);
+}
+
+fd_t internal_open(const char *filename, int flags, u32 mode) {
+  return syscall(__NR_open, filename, flags, mode);
+}
+
+fd_t OpenFile(const char *filename, bool write) {
+  return internal_open(filename,
       write ? O_WRONLY | O_CREAT /*| O_CLOEXEC*/ : O_RDONLY, 0660);
 }
 
@@ -79,16 +99,38 @@
   return res;
 }
 
+int internal_stat(const char *path, void *buf) {
+#if SANITIZER_LINUX_USES_64BIT_SYSCALLS
+  return syscall(__NR_stat, path, buf);
+#else
+  return syscall(__NR_stat64, path, buf);
+#endif
+}
+
+int internal_lstat(const char *path, void *buf) {
+#if SANITIZER_LINUX_USES_64BIT_SYSCALLS
+  return syscall(__NR_lstat, path, buf);
+#else
+  return syscall(__NR_lstat64, path, buf);
+#endif
+}
+
+int internal_fstat(fd_t fd, void *buf) {
+#if SANITIZER_LINUX_USES_64BIT_SYSCALLS
+  return syscall(__NR_fstat, fd, buf);
+#else
+  return syscall(__NR_fstat64, fd, buf);
+#endif
+}
+
 uptr internal_filesize(fd_t fd) {
 #if SANITIZER_LINUX_USES_64BIT_SYSCALLS
   struct stat st;
-  if (syscall(__NR_fstat, fd, &st))
-    return -1;
 #else
   struct stat64 st;
-  if (syscall(__NR_fstat64, fd, &st))
-    return -1;
 #endif
+  if (internal_fstat(fd, &st))
+    return -1;
   return (uptr)st.st_size;
 }
 
@@ -104,6 +146,11 @@
   return syscall(__NR_sched_yield);
 }
 
+void internal__exit(int exitcode) {
+  syscall(__NR_exit_group, exitcode);
+  Die();  // Unreachable.
+}
+
 // ----------------- sanitizer_common.h
 bool FileExists(const char *filename) {
 #if SANITIZER_LINUX_USES_64BIT_SYSCALLS
@@ -198,24 +245,54 @@
   return 0;  // Not found.
 }
 
-void ReExec() {
-  static const int kMaxArgv = 100;
-  InternalScopedBuffer<char*> argv(kMaxArgv + 1);
-  static char *buff;
+#ifdef __GLIBC__
+
+extern "C" {
+  extern void *__libc_stack_end;
+}
+
+static void GetArgsAndEnv(char ***argv, char ***envp) {
+  uptr *stack_end = (uptr *)__libc_stack_end;
+  int argc = *stack_end;
+  *argv = (char**)(stack_end + 1);
+  *envp = (char**)(stack_end + argc + 2);
+}
+
+#else  // __GLIBC__
+
+static void ReadNullSepFileToArray(const char *path, char ***arr,
+                                   int arr_size) {
+  char *buff;
   uptr buff_size = 0;
-  ReadFileToBuffer("/proc/self/cmdline", &buff, &buff_size, 1024 * 1024);
-  argv[0] = buff;
-  int argc, i;
-  for (argc = 1, i = 1; ; i++) {
+  *arr = (char **)MmapOrDie(arr_size * sizeof(char *), "NullSepFileArray");
+  ReadFileToBuffer(path, &buff, &buff_size, 1024 * 1024);
+  (*arr)[0] = buff;
+  int count, i;
+  for (count = 1, i = 1; ; i++) {
     if (buff[i] == 0) {
       if (buff[i+1] == 0) break;
-      argv[argc] = &buff[i+1];
-      CHECK_LE(argc, kMaxArgv);  // FIXME: make this more flexible.
-      argc++;
+      (*arr)[count] = &buff[i+1];
+      CHECK_LE(count, arr_size - 1);  // FIXME: make this more flexible.
+      count++;
     }
   }
-  argv[argc] = 0;
-  execv(argv[0], argv.data());
+  (*arr)[count] = 0;
+}
+
+static void GetArgsAndEnv(char ***argv, char ***envp) {
+  static const int kMaxArgv = 2000, kMaxEnvp = 2000;
+  ReadNullSepFileToArray("/proc/self/cmdline", argv, kMaxArgv);
+  ReadNullSepFileToArray("/proc/self/environ", envp, kMaxEnvp);
+}
+
+#endif  // __GLIBC__
+
+void ReExec() {
+  char **argv, **envp;
+  GetArgsAndEnv(&argv, &envp);
+  execve("/proc/self/exe", argv, envp);
+  Printf("execve failed, errno %d\n", errno);
+  Die();
 }
 
 void PrepareForSandboxing() {
@@ -366,16 +443,219 @@
 }
 
 bool SanitizerSetThreadName(const char *name) {
+#ifdef PR_SET_NAME
   return 0 == prctl(PR_SET_NAME, (unsigned long)name, 0, 0, 0);  // NOLINT
+#else
+  return false;
+#endif
 }
 
 bool SanitizerGetThreadName(char *name, int max_len) {
+#ifdef PR_GET_NAME
   char buff[17];
   if (prctl(PR_GET_NAME, (unsigned long)buff, 0, 0, 0))  // NOLINT
     return false;
   internal_strncpy(name, buff, max_len);
   name[max_len] = 0;
   return true;
+#else
+  return false;
+#endif
+}
+
+#ifndef SANITIZER_GO
+//------------------------- SlowUnwindStack -----------------------------------
+#ifdef __arm__
+#define UNWIND_STOP _URC_END_OF_STACK
+#define UNWIND_CONTINUE _URC_NO_REASON
+#else
+#define UNWIND_STOP _URC_NORMAL_STOP
+#define UNWIND_CONTINUE _URC_NO_REASON
+#endif
+
+uptr Unwind_GetIP(struct _Unwind_Context *ctx) {
+#ifdef __arm__
+  uptr val;
+  _Unwind_VRS_Result res = _Unwind_VRS_Get(ctx, _UVRSC_CORE,
+      15 /* r15 = PC */, _UVRSD_UINT32, &val);
+  CHECK(res == _UVRSR_OK && "_Unwind_VRS_Get failed");
+  // Clear the Thumb bit.
+  return val & ~(uptr)1;
+#else
+  return _Unwind_GetIP(ctx);
+#endif
+}
+
+_Unwind_Reason_Code Unwind_Trace(struct _Unwind_Context *ctx, void *param) {
+  StackTrace *b = (StackTrace*)param;
+  CHECK(b->size < b->max_size);
+  uptr pc = Unwind_GetIP(ctx);
+  b->trace[b->size++] = pc;
+  if (b->size == b->max_size) return UNWIND_STOP;
+  return UNWIND_CONTINUE;
+}
+
+static bool MatchPc(uptr cur_pc, uptr trace_pc) {
+  return cur_pc - trace_pc <= 64 || trace_pc - cur_pc <= 64;
+}
+
+void StackTrace::SlowUnwindStack(uptr pc, uptr max_depth) {
+  this->size = 0;
+  this->max_size = max_depth;
+  if (max_depth > 1) {
+    _Unwind_Backtrace(Unwind_Trace, this);
+    // We need to pop a few frames so that pc is on top.
+    // trace[0] belongs to the current function so we always pop it.
+    int to_pop = 1;
+    /**/ if (size > 1 && MatchPc(pc, trace[1])) to_pop = 1;
+    else if (size > 2 && MatchPc(pc, trace[2])) to_pop = 2;
+    else if (size > 3 && MatchPc(pc, trace[3])) to_pop = 3;
+    else if (size > 4 && MatchPc(pc, trace[4])) to_pop = 4;
+    else if (size > 5 && MatchPc(pc, trace[5])) to_pop = 5;
+    this->PopStackFrames(to_pop);
+  }
+  this->trace[0] = pc;
+}
+
+#endif  // #ifndef SANITIZER_GO
+
+enum MutexState {
+  MtxUnlocked = 0,
+  MtxLocked = 1,
+  MtxSleeping = 2
+};
+
+BlockingMutex::BlockingMutex(LinkerInitialized) {
+  CHECK_EQ(owner_, 0);
+}
+
+void BlockingMutex::Lock() {
+  atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
+  if (atomic_exchange(m, MtxLocked, memory_order_acquire) == MtxUnlocked)
+    return;
+  while (atomic_exchange(m, MtxSleeping, memory_order_acquire) != MtxUnlocked)
+    syscall(__NR_futex, m, FUTEX_WAIT, MtxSleeping, 0, 0, 0);
+}
+
+void BlockingMutex::Unlock() {
+  atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
+  u32 v = atomic_exchange(m, MtxUnlocked, memory_order_relaxed);
+  CHECK_NE(v, MtxUnlocked);
+  if (v == MtxSleeping)
+    syscall(__NR_futex, m, FUTEX_WAKE, 1, 0, 0, 0);
+}
+
+void BlockingMutex::CheckLocked() {
+  atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
+  CHECK_NE(MtxUnlocked, atomic_load(m, memory_order_relaxed));
+}
+
+// ----------------- sanitizer_linux.h
+// The actual size of this structure is specified by d_reclen.
+// Note that getdents64 uses a different structure format. We only provide the
+// 32-bit syscall here.
+struct linux_dirent {
+  unsigned long      d_ino;
+  unsigned long      d_off;
+  unsigned short     d_reclen;
+  char               d_name[256];
+};
+
+// Syscall wrappers.
+long internal_ptrace(int request, int pid, void *addr, void *data) {
+  return syscall(__NR_ptrace, request, pid, addr, data);
+}
+
+int internal_waitpid(int pid, int *status, int options) {
+  return syscall(__NR_wait4, pid, status, options, NULL /* rusage */);
+}
+
+int internal_getppid() {
+  return syscall(__NR_getppid);
+}
+
+int internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) {
+  return syscall(__NR_getdents, fd, dirp, count);
+}
+
+OFF_T internal_lseek(fd_t fd, OFF_T offset, int whence) {
+  return syscall(__NR_lseek, fd, offset, whence);
+}
+
+int internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) {
+  return syscall(__NR_prctl, option, arg2, arg3, arg4, arg5);
+}
+
+int internal_sigaltstack(const struct sigaltstack *ss,
+                         struct sigaltstack *oss) {
+  return syscall(__NR_sigaltstack, ss, oss);
+}
+
+
+// ThreadLister implementation.
+ThreadLister::ThreadLister(int pid)
+  : pid_(pid),
+    descriptor_(-1),
+    error_(true),
+    entry_((linux_dirent *)buffer_),
+    bytes_read_(0) {
+  char task_directory_path[80];
+  internal_snprintf(task_directory_path, sizeof(task_directory_path),
+                    "/proc/%d/task/", pid);
+  descriptor_ = internal_open(task_directory_path, O_RDONLY | O_DIRECTORY);
+  if (descriptor_ < 0) {
+    error_ = true;
+    Report("Can't open /proc/%d/task for reading.\n", pid);
+  } else {
+    error_ = false;
+  }
+}
+
+int ThreadLister::GetNextTID() {
+  int tid = -1;
+  do {
+    if (error_)
+      return -1;
+    if ((char *)entry_ >= &buffer_[bytes_read_] && !GetDirectoryEntries())
+      return -1;
+    if (entry_->d_ino != 0 && entry_->d_name[0] >= '0' &&
+        entry_->d_name[0] <= '9') {
+      // Found a valid tid.
+      tid = (int)internal_atoll(entry_->d_name);
+    }
+    entry_ = (struct linux_dirent *)(((char *)entry_) + entry_->d_reclen);
+  } while (tid < 0);
+  return tid;
+}
+
+void ThreadLister::Reset() {
+  if (error_ || descriptor_ < 0)
+    return;
+  internal_lseek(descriptor_, 0, SEEK_SET);
+}
+
+ThreadLister::~ThreadLister() {
+  if (descriptor_ >= 0)
+    internal_close(descriptor_);
+}
+
+bool ThreadLister::error() { return error_; }
+
+bool ThreadLister::GetDirectoryEntries() {
+  CHECK_GE(descriptor_, 0);
+  CHECK_NE(error_, true);
+  bytes_read_ = internal_getdents(descriptor_,
+                                  (struct linux_dirent *)buffer_,
+                                  sizeof(buffer_));
+  if (bytes_read_ < 0) {
+    Report("Can't read directory entries from /proc/%d/task.\n", pid_);
+    error_ = true;
+    return false;
+  } else if (bytes_read_ == 0) {
+    return false;
+  }
+  entry_ = (struct linux_dirent *)buffer_;
+  return true;
 }
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_linux.h b/lib/sanitizer_common/sanitizer_linux.h
new file mode 100644
index 0000000..b4ac310
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_linux.h
@@ -0,0 +1,53 @@
+//===-- sanitizer_linux.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Linux-specific syscall wrappers and classes.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_LINUX_H
+#define SANITIZER_LINUX_H
+
+#include "sanitizer_internal_defs.h"
+
+struct sigaltstack;
+
+namespace __sanitizer {
+// Dirent structure for getdents(). Note that this structure is different from
+// the one in <dirent.h>, which is used by readdir().
+struct linux_dirent;
+
+// Syscall wrappers.
+int internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count);
+int internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5);
+int internal_sigaltstack(const struct sigaltstack *ss, struct sigaltstack *oss);
+
+// This class reads thread IDs from /proc/<pid>/task using only syscalls.
+class ThreadLister {
+ public:
+  explicit ThreadLister(int pid);
+  ~ThreadLister();
+  // GetNextTID returns -1 if the list of threads is exhausted, or if there has
+  // been an error.
+  int GetNextTID();
+  void Reset();
+  bool error();
+
+ private:
+  bool GetDirectoryEntries();
+
+  int pid_;
+  int descriptor_;
+  char buffer_[4096];
+  bool error_;
+  struct linux_dirent* entry_;
+  int bytes_read_;
+};
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_LINUX_H
diff --git a/lib/sanitizer_common/sanitizer_list.h b/lib/sanitizer_common/sanitizer_list.h
index ef98eee..f61d28f 100644
--- a/lib/sanitizer_common/sanitizer_list.h
+++ b/lib/sanitizer_common/sanitizer_list.h
@@ -72,6 +72,8 @@
 
   void append_front(IntrusiveList<Item> *l) {
     CHECK_NE(this, l);
+    if (l->empty())
+      return;
     if (empty()) {
       *this = *l;
     } else if (!l->empty()) {
@@ -84,6 +86,8 @@
 
   void append_back(IntrusiveList<Item> *l) {
     CHECK_NE(this, l);
+    if (l->empty())
+      return;
     if (empty()) {
       *this = *l;
     } else {
diff --git a/lib/sanitizer_common/sanitizer_mac.cc b/lib/sanitizer_common/sanitizer_mac.cc
index e156eaa..0f11220 100644
--- a/lib/sanitizer_common/sanitizer_mac.cc
+++ b/lib/sanitizer_common/sanitizer_mac.cc
@@ -13,6 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #ifdef __APPLE__
+// Use 64-bit inodes in file operations. ASan does not support OS X 10.5, so
+// the clients will most certainly use 64-bit ones as well.
+#ifndef _DARWIN_USE_64_BIT_INODE
+#define _DARWIN_USE_64_BIT_INODE 1
+#endif
+#include <stdio.h>
 
 #include "sanitizer_common.h"
 #include "sanitizer_internal_defs.h"
@@ -30,6 +36,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include <libkern/OSAtomic.h>
 
 namespace __sanitizer {
 
@@ -47,9 +54,17 @@
   return close(fd);
 }
 
-fd_t internal_open(const char *filename, bool write) {
-  return open(filename,
-              write ? O_WRONLY | O_CREAT : O_RDONLY, 0660);
+fd_t internal_open(const char *filename, int flags) {
+  return open(filename, flags);
+}
+
+fd_t internal_open(const char *filename, int flags, u32 mode) {
+  return open(filename, flags, mode);
+}
+
+fd_t OpenFile(const char *filename, bool write) {
+  return internal_open(filename,
+      write ? O_WRONLY | O_CREAT : O_RDONLY, 0660);
 }
 
 uptr internal_read(fd_t fd, void *buf, uptr count) {
@@ -60,9 +75,21 @@
   return write(fd, buf, count);
 }
 
+int internal_stat(const char *path, void *buf) {
+  return stat(path, (struct stat *)buf);
+}
+
+int internal_lstat(const char *path, void *buf) {
+  return lstat(path, (struct stat *)buf);
+}
+
+int internal_fstat(fd_t fd, void *buf) {
+  return fstat(fd, (struct stat *)buf);
+}
+
 uptr internal_filesize(fd_t fd) {
   struct stat st;
-  if (fstat(fd, &st))
+  if (internal_fstat(fd, &st))
     return -1;
   return (uptr)st.st_size;
 }
@@ -79,6 +106,10 @@
   return sched_yield();
 }
 
+void internal__exit(int exitcode) {
+  _exit(exitcode);
+}
+
 // ----------------- sanitizer_common.h
 bool FileExists(const char *filename) {
   struct stat st;
@@ -267,6 +298,29 @@
   return IterateForObjectNameAndOffset(addr, offset, filename, filename_size);
 }
 
+BlockingMutex::BlockingMutex(LinkerInitialized) {
+  // We assume that OS_SPINLOCK_INIT is zero
+}
+
+void BlockingMutex::Lock() {
+  CHECK(sizeof(OSSpinLock) <= sizeof(opaque_storage_));
+  CHECK_EQ(OS_SPINLOCK_INIT, 0);
+  CHECK_NE(owner_, (uptr)pthread_self());
+  OSSpinLockLock((OSSpinLock*)&opaque_storage_);
+  CHECK(!owner_);
+  owner_ = (uptr)pthread_self();
+}
+
+void BlockingMutex::Unlock() {
+  CHECK(owner_ == (uptr)pthread_self());
+  owner_ = 0;
+  OSSpinLockUnlock((OSSpinLock*)&opaque_storage_);
+}
+
+void BlockingMutex::CheckLocked() {
+  CHECK_EQ((uptr)pthread_self(), owner_);
+}
+
 }  // namespace __sanitizer
 
 #endif  // __APPLE__
diff --git a/lib/sanitizer_common/sanitizer_mutex.h b/lib/sanitizer_common/sanitizer_mutex.h
index 6b6c25d..be3d559 100644
--- a/lib/sanitizer_common/sanitizer_mutex.h
+++ b/lib/sanitizer_common/sanitizer_mutex.h
@@ -27,11 +27,15 @@
   }
 
   void Lock() {
-    if (atomic_exchange(&state_, 1, memory_order_acquire) == 0)
+    if (TryLock())
       return;
     LockSlow();
   }
 
+  bool TryLock() {
+    return atomic_exchange(&state_, 1, memory_order_acquire) == 0;
+  }
+
   void Unlock() {
     atomic_store(&state_, 0, memory_order_release);
   }
@@ -63,6 +67,17 @@
   void operator=(const SpinMutex&);
 };
 
+class BlockingMutex {
+ public:
+  explicit BlockingMutex(LinkerInitialized);
+  void Lock();
+  void Unlock();
+  void CheckLocked();
+ private:
+  uptr opaque_storage_[10];
+  uptr owner_;  // for debugging
+};
+
 template<typename MutexType>
 class GenericScopedLock {
  public:
@@ -102,6 +117,7 @@
 };
 
 typedef GenericScopedLock<StaticSpinMutex> SpinMutexLock;
+typedef GenericScopedLock<BlockingMutex> BlockingMutexLock;
 
 }  // namespace __sanitizer
 
diff --git a/lib/sanitizer_common/sanitizer_platform_interceptors.h b/lib/sanitizer_common/sanitizer_platform_interceptors.h
new file mode 100644
index 0000000..f5cf9a7
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -0,0 +1,48 @@
+//===-- sanitizer_platform_interceptors.h -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines macro telling whether sanitizer tools can/should intercept
+// given library functions on a given platform.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_internal_defs.h"
+
+#if !defined(_WIN32)
+# define SI_NOT_WINDOWS 1
+# include "sanitizer_platform_limits_posix.h"
+#else
+# define SI_NOT_WINDOWS 0
+#endif
+
+#if defined(__linux__) && !defined(ANDROID)
+# define SI_LINUX_NOT_ANDROID 1
+#else
+# define SI_LINUX_NOT_ANDROID 0
+#endif
+
+#if defined(__linux__)
+# define SI_LINUX 1
+#else
+# define SI_LINUX 0
+#endif
+
+# define SANITIZER_INTERCEPT_READ   SI_NOT_WINDOWS
+# define SANITIZER_INTERCEPT_PREAD  SI_NOT_WINDOWS
+# define SANITIZER_INTERCEPT_WRITE  SI_NOT_WINDOWS
+# define SANITIZER_INTERCEPT_PWRITE SI_NOT_WINDOWS
+
+# define SANITIZER_INTERCEPT_PREAD64 SI_LINUX_NOT_ANDROID
+# define SANITIZER_INTERCEPT_PWRITE64 SI_LINUX_NOT_ANDROID
+# define SANITIZER_INTERCEPT_PRCTL   SI_LINUX_NOT_ANDROID
+
+# define SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS SI_NOT_WINDOWS
+
+# define SANITIZER_INTERCEPT_SCANF SI_NOT_WINDOWS
+# define SANITIZER_INTERCEPT_ISOC99_SCANF SI_LINUX
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
new file mode 100644
index 0000000..0ba71a8
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
@@ -0,0 +1,70 @@
+//===-- sanitizer_platform_limits_posix.cc --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of Sanitizer common code.
+//
+// Sizes and layouts of platform-specific POSIX data structures.
+//===----------------------------------------------------------------------===//
+
+#if defined(__linux__) || defined(__APPLE__)
+
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_platform_limits_posix.h"
+
+#include <dirent.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <time.h>
+
+#if defined(__linux__)
+#include <sys/vfs.h>
+#include <sys/epoll.h>
+#endif // __linux__
+
+namespace __sanitizer {
+  unsigned struct_utsname_sz = sizeof(struct utsname);
+  unsigned struct_stat_sz = sizeof(struct stat);
+  unsigned struct_stat64_sz = sizeof(struct stat64);
+  unsigned struct_rusage_sz = sizeof(struct rusage);
+  unsigned struct_tm_sz = sizeof(struct tm);
+
+#if defined(__linux__)
+  unsigned struct_rlimit_sz = sizeof(struct rlimit);
+  unsigned struct_dirent_sz = sizeof(struct dirent);
+  unsigned struct_statfs_sz = sizeof(struct statfs);
+  unsigned struct_epoll_event_sz = sizeof(struct epoll_event);
+#endif // __linux__
+
+#if defined(__linux__) && !defined(__ANDROID__)
+  unsigned struct_rlimit64_sz = sizeof(struct rlimit64);
+  unsigned struct_statfs64_sz = sizeof(struct statfs64);
+#endif // __linux__ && !__ANDROID__
+
+  void* __sanitizer_get_msghdr_iov_iov_base(void* msg, int idx) {
+    return ((struct msghdr *)msg)->msg_iov[idx].iov_base;
+  }
+
+  uptr __sanitizer_get_msghdr_iov_iov_len(void* msg, int idx) {
+    return ((struct msghdr *)msg)->msg_iov[idx].iov_len;
+  }
+
+  uptr __sanitizer_get_msghdr_iovlen(void* msg) {
+    return ((struct msghdr *)msg)->msg_iovlen;
+  }
+
+  uptr __sanitizer_get_socklen_t(void* socklen_ptr) {
+    return *(socklen_t*)socklen_ptr;
+  }
+}  // namespace __sanitizer
+
+#endif  // __linux__ || __APPLE__
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
new file mode 100644
index 0000000..dd00663
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -0,0 +1,43 @@
+//===-- sanitizer_platform_limits_posix.h ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of Sanitizer common code.
+//
+// Sizes and layouts of platform-specific POSIX data structures.
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_PLATFORM_LIMITS_POSIX_H
+#define SANITIZER_PLATFORM_LIMITS_POSIX_H
+
+namespace __sanitizer {
+  extern unsigned struct_utsname_sz;
+  extern unsigned struct_stat_sz;
+  extern unsigned struct_stat64_sz;
+  extern unsigned struct_rusage_sz;
+  extern unsigned struct_tm_sz;
+
+#if defined(__linux__)
+  extern unsigned struct_rlimit_sz;
+  extern unsigned struct_dirent_sz;
+  extern unsigned struct_statfs_sz;
+  extern unsigned struct_epoll_event_sz;
+#endif // __linux__
+
+#if defined(__linux__) && !defined(__ANDROID__)
+  extern unsigned struct_rlimit64_sz;
+  extern unsigned struct_statfs64_sz;
+#endif // __linux__ && !__ANDROID__
+
+  void* __sanitizer_get_msghdr_iov_iov_base(void* msg, int idx);
+  uptr __sanitizer_get_msghdr_iov_iov_len(void* msg, int idx);
+  uptr __sanitizer_get_msghdr_iovlen(void* msg);
+  uptr __sanitizer_get_socklen_t(void* socklen_ptr);
+}  // namespace __sanitizer
+
+#endif
diff --git a/lib/sanitizer_common/sanitizer_posix.cc b/lib/sanitizer_common/sanitizer_posix.cc
index d5cf999..773f376 100644
--- a/lib/sanitizer_common/sanitizer_posix.cc
+++ b/lib/sanitizer_common/sanitizer_posix.cc
@@ -44,6 +44,10 @@
   return getpid();
 }
 
+u32 GetUid() {
+  return getuid();
+}
+
 uptr GetThreadSelf() {
   return (uptr)pthread_self();
 }
@@ -58,12 +62,12 @@
     if (recursion_count) {
       // The Report() and CHECK calls below may call mmap recursively and fail.
       // If we went into recursion, just die.
-      RawWrite("AddressSanitizer is unable to mmap\n");
+      RawWrite("ERROR: Failed to mmap\n");
       Die();
     }
     recursion_count++;
-    Report("ERROR: Failed to allocate 0x%zx (%zd) bytes of %s: %s\n",
-           size, size, mem_type, strerror(errno));
+    Report("ERROR: %s failed to allocate 0x%zx (%zd) bytes of %s: %d\n",
+           SanitizerToolName, size, size, mem_type, errno);
     DumpProcessMap();
     CHECK("unable to mmap" && 0);
   }
@@ -74,8 +78,8 @@
   if (!addr || !size) return;
   int res = internal_munmap(addr, size);
   if (res != 0) {
-    Report("ERROR: Failed to deallocate 0x%zx (%zd) bytes at address %p\n",
-           size, size, addr);
+    Report("ERROR: %s failed to deallocate 0x%zx (%zd) bytes at address %p\n",
+           SanitizerToolName, size, size, addr);
     CHECK("unable to unmap" && 0);
   }
 }
@@ -88,8 +92,25 @@
       MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
       -1, 0);
   if (p == (void*)-1)
-    Report("ERROR: Failed to allocate 0x%zx (%zd) bytes at address %p (%d)\n",
-           size, size, fixed_addr, errno);
+    Report("ERROR: "
+           "%s failed to allocate 0x%zx (%zd) bytes at address %p (%d)\n",
+           SanitizerToolName, size, size, fixed_addr, errno);
+  return p;
+}
+
+void *MmapFixedOrDie(uptr fixed_addr, uptr size) {
+  uptr PageSize = GetPageSizeCached();
+  void *p = internal_mmap((void*)(fixed_addr & ~(PageSize - 1)),
+      RoundUpTo(size, PageSize),
+      PROT_READ | PROT_WRITE,
+      MAP_PRIVATE | MAP_ANON | MAP_FIXED,
+      -1, 0);
+  if (p == (void*)-1) {
+    Report("ERROR:"
+           " %s failed to allocate 0x%zx (%zd) bytes at address %p (%d)\n",
+           SanitizerToolName, size, size, fixed_addr, errno);
+    CHECK("unable to mmap" && 0);
+  }
   return p;
 }
 
@@ -100,8 +121,12 @@
                        -1, 0);
 }
 
+void FlushUnneededShadowMemory(uptr addr, uptr size) {
+  madvise((void*)addr, size, MADV_DONTNEED);
+}
+
 void *MapFileToMemory(const char *file_name, uptr *buff_size) {
-  fd_t fd = internal_open(file_name, false);
+  fd_t fd = OpenFile(file_name, false);
   CHECK_NE(fd, kInvalidFd);
   uptr fsize = internal_filesize(fd);
   CHECK_NE(fsize, (uptr)-1);
@@ -170,7 +195,7 @@
   rlim.rlim_cur = limit;
   rlim.rlim_max = limit;
   if (setrlimit(RLIMIT_STACK, &rlim)) {
-    Report("setrlimit() failed %d\n", errno);
+    Report("ERROR: %s setrlimit() failed %d\n", SanitizerToolName, errno);
     Die();
   }
   CHECK(!StackSizeIsUnlimited());
@@ -184,10 +209,6 @@
   usleep(millis * 1000);
 }
 
-void Exit(int exitcode) {
-  _exit(exitcode);
-}
-
 void Abort() {
   abort();
 }
diff --git a/lib/sanitizer_common/sanitizer_printf.cc b/lib/sanitizer_common/sanitizer_printf.cc
index b671298..2b00cae 100644
--- a/lib/sanitizer_common/sanitizer_printf.cc
+++ b/lib/sanitizer_common/sanitizer_printf.cc
@@ -94,7 +94,7 @@
 int VSNPrintf(char *buff, int buff_length,
               const char *format, va_list args) {
   static const char *kPrintfFormatsHelp =
-    "Supported Printf formats: %%(0[0-9]*)?(z|ll)?{d,u,x}; %%p; %%s; %%c\n";
+    "Supported Printf formats: %(0[0-9]*)?(z|ll)?{d,u,x}; %p; %s; %c\n";
   RAW_CHECK(format);
   RAW_CHECK(buff_length > 0);
   const char *buff_end = &buff[buff_length - 1];
@@ -201,19 +201,51 @@
 // Like Printf, but prints the current PID before the output string.
 void Report(const char *format, ...) {
   const int kLen = 16 * 1024;
-  InternalScopedBuffer<char> buffer(kLen);
-  int needed_length = internal_snprintf(buffer.data(),
-                                        kLen, "==%d== ", GetPid());
-  RAW_CHECK_MSG(needed_length < kLen, "Buffer in Report is too short!\n");
-  va_list args;
-  va_start(args, format);
-  needed_length += VSNPrintf(buffer.data() + needed_length,
-                             kLen - needed_length, format, args);
-  va_end(args);
-  RAW_CHECK_MSG(needed_length < kLen, "Buffer in Report is too short!\n");
-  RawWrite(buffer.data());
-  if (PrintfAndReportCallback)
-    PrintfAndReportCallback(buffer.data());
+  // |local_buffer| is small enough not to overflow the stack and/or violate
+  // the stack limit enforced by TSan (-Wframe-larger-than=512). On the other
+  // hand, the bigger the buffer is, the more the chance the error report will
+  // fit into it.
+  char local_buffer[400];
+  int needed_length;
+  int pid = GetPid();
+  char *buffer = local_buffer;
+  int cur_size = sizeof(local_buffer) / sizeof(char);
+  for (int use_mmap = 0; use_mmap < 2; use_mmap++) {
+    needed_length = internal_snprintf(buffer, cur_size,
+                                      "==%d==", pid);
+    if (needed_length >= cur_size) {
+      if (use_mmap) {
+        RAW_CHECK_MSG(needed_length < kLen, "Buffer in Report is too short!\n");
+      } else {
+        // The pid doesn't fit into the local buffer.
+        continue;
+      }
+    }
+    va_list args;
+    va_start(args, format);
+    needed_length += VSNPrintf(buffer + needed_length,
+                               cur_size - needed_length, format, args);
+    va_end(args);
+    if (needed_length >= cur_size) {
+      if (use_mmap) {
+        RAW_CHECK_MSG(needed_length < kLen, "Buffer in Report is too short!\n");
+      } else {
+        // The error message doesn't fit into the local buffer - allocate a bigger one.
+        buffer = (char*)MmapOrDie(kLen, "Report");
+        cur_size = kLen;
+        continue;
+      }
+    } else {
+      RawWrite(buffer);
+      if (PrintfAndReportCallback)
+        PrintfAndReportCallback(buffer);
+      // Don't do anything for the second time if the first iteration
+      // succeeded.
+      break;
+    }
+  }
+  // If we had mapped any memory, clean up.
+  if (buffer != local_buffer) UnmapOrDie((void*)buffer, cur_size);
 }
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_quarantine.h b/lib/sanitizer_common/sanitizer_quarantine.h
new file mode 100644
index 0000000..599d136
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_quarantine.h
@@ -0,0 +1,172 @@
+//===-- sanitizer_quarantine.h ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Memory quarantine for AddressSanitizer and potentially other tools.
+// Quarantine caches some specified amount of memory in per-thread caches,
+// then evicts to global FIFO queue. When the queue reaches specified threshold,
+// oldest memory is recycled.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_QUARANTINE_H
+#define SANITIZER_QUARANTINE_H
+
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_mutex.h"
+#include "sanitizer_list.h"
+
+namespace __sanitizer {
+
+template<typename Node> class QuarantineCache;
+
+struct QuarantineBatch {
+  static const uptr kSize = 1024;
+  QuarantineBatch *next;
+  uptr size;
+  uptr count;
+  void *batch[kSize];
+};
+
+// The callback interface is:
+// void Callback::Recycle(Node *ptr);
+// void *cb.Allocate(uptr size);
+// void cb.Deallocate(void *ptr);
+template<typename Callback, typename Node>
+class Quarantine {
+ public:
+  typedef QuarantineCache<Callback> Cache;
+
+  explicit Quarantine(LinkerInitialized)
+      : cache_(LINKER_INITIALIZED) {
+  }
+
+  void Init(uptr size, uptr cache_size) {
+    max_size_ = size;
+    min_size_ = size / 10 * 9;  // 90% of max size.
+    max_cache_size_ = cache_size;
+  }
+
+  void Put(Cache *c, Callback cb, Node *ptr, uptr size) {
+    c->Enqueue(cb, ptr, size);
+    if (c->Size() > max_cache_size_)
+      Drain(c, cb);
+  }
+
+  void NOINLINE Drain(Cache *c, Callback cb) {
+    {
+      SpinMutexLock l(&cache_mutex_);
+      cache_.Transfer(c);
+    }
+    if (cache_.Size() > max_size_ && recycle_mutex_.TryLock())
+      Recycle(cb);
+  }
+
+ private:
+  // Read-only data.
+  char pad0_[kCacheLineSize];
+  uptr max_size_;
+  uptr min_size_;
+  uptr max_cache_size_;
+  char pad1_[kCacheLineSize];
+  SpinMutex cache_mutex_;
+  SpinMutex recycle_mutex_;
+  Cache cache_;
+  char pad2_[kCacheLineSize];
+
+  void NOINLINE Recycle(Callback cb) {
+    Cache tmp;
+    {
+      SpinMutexLock l(&cache_mutex_);
+      while (cache_.Size() > min_size_) {
+        QuarantineBatch *b = cache_.DequeueBatch();
+        tmp.EnqueueBatch(b);
+      }
+    }
+    recycle_mutex_.Unlock();
+    DoRecycle(&tmp, cb);
+  }
+
+  void NOINLINE DoRecycle(Cache *c, Callback cb) {
+    while (QuarantineBatch *b = c->DequeueBatch()) {
+      const uptr kPrefetch = 16;
+      for (uptr i = 0; i < kPrefetch; i++)
+        PREFETCH(b->batch[i]);
+      for (uptr i = 0; i < b->count; i++) {
+        PREFETCH(b->batch[i + kPrefetch]);
+        cb.Recycle((Node*)b->batch[i]);
+      }
+      cb.Deallocate(b);
+    }
+  }
+};
+
+// Per-thread cache of memory blocks.
+template<typename Callback>
+class QuarantineCache {
+ public:
+  explicit QuarantineCache(LinkerInitialized) {
+  }
+
+  QuarantineCache()
+      : size_() {
+    list_.clear();
+  }
+
+  uptr Size() const {
+    return atomic_load(&size_, memory_order_relaxed);
+  }
+
+  void Enqueue(Callback cb, void *ptr, uptr size) {
+    if (list_.empty() || list_.back()->count == QuarantineBatch::kSize)
+      AllocBatch(cb);
+    QuarantineBatch *b = list_.back();
+    b->batch[b->count++] = ptr;
+    b->size += size;
+    SizeAdd(size);
+  }
+
+  void Transfer(QuarantineCache *c) {
+    list_.append_back(&c->list_);
+    SizeAdd(c->Size());
+    atomic_store(&c->size_, 0, memory_order_relaxed);
+  }
+
+  void EnqueueBatch(QuarantineBatch *b) {
+    list_.push_back(b);
+    SizeAdd(b->size);
+  }
+
+  QuarantineBatch *DequeueBatch() {
+    if (list_.empty())
+      return 0;
+    QuarantineBatch *b = list_.front();
+    list_.pop_front();
+    SizeAdd(-b->size);
+    return b;
+  }
+
+ private:
+  IntrusiveList<QuarantineBatch> list_;
+  atomic_uintptr_t size_;
+
+  void SizeAdd(uptr add) {
+    atomic_store(&size_, Size() + add, memory_order_relaxed);
+  }
+
+  NOINLINE QuarantineBatch* AllocBatch(Callback cb) {
+    QuarantineBatch *b = (QuarantineBatch *)cb.Allocate(sizeof(*b));
+    b->count = 0;
+    b->size = 0;
+    list_.push_back(b);
+    return b;
+  }
+};
+}  // namespace __sanitizer
+
+#endif  // #ifndef SANITIZER_QUARANTINE_H
diff --git a/lib/sanitizer_common/sanitizer_report_decorator.h b/lib/sanitizer_common/sanitizer_report_decorator.h
new file mode 100644
index 0000000..49334d5
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_report_decorator.h
@@ -0,0 +1,39 @@
+//===-- sanitizer_report_decorator.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tags to decorate the sanitizer reports.
+// Currently supported tags:
+//   * None.
+//   * ANSI color sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_REPORT_DECORATOR_H
+#define SANITIZER_REPORT_DECORATOR_H
+
+namespace __sanitizer {
+class AnsiColorDecorator {
+ public:
+  explicit AnsiColorDecorator(bool use_ansi_colors) : ansi_(use_ansi_colors) { }
+  const char *Bold()    const { return ansi_ ? "\033[1m" : ""; }
+  const char *Black()   const { return ansi_ ? "\033[1m\033[30m" : ""; }
+  const char *Red()     const { return ansi_ ? "\033[1m\033[31m" : ""; }
+  const char *Green()   const { return ansi_ ? "\033[1m\033[32m" : ""; }
+  const char *Yellow()  const { return ansi_ ? "\033[1m\033[33m" : ""; }
+  const char *Blue()    const { return ansi_ ? "\033[1m\033[34m" : ""; }
+  const char *Magenta() const { return ansi_ ? "\033[1m\033[35m" : ""; }
+  const char *Cyan()    const { return ansi_ ? "\033[1m\033[36m" : ""; }
+  const char *White()   const { return ansi_ ? "\033[1m\033[37m" : ""; }
+  const char *Default() const { return ansi_ ? "\033[1m\033[0m"  : ""; }
+ private:
+  bool ansi_;
+};
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_REPORT_DECORATOR_H
diff --git a/lib/sanitizer_common/sanitizer_stackdepot.cc b/lib/sanitizer_common/sanitizer_stackdepot.cc
index 6fb3d2d..08e5238 100644
--- a/lib/sanitizer_common/sanitizer_stackdepot.cc
+++ b/lib/sanitizer_common/sanitizer_stackdepot.cc
@@ -42,6 +42,12 @@
   atomic_uint32_t seq[kPartCount];  // Unique id generators.
 } depot;
 
+static StackDepotStats stats;
+
+StackDepotStats *StackDepotGetStats() {
+  return &stats;
+}
+
 static u32 hash(const uptr *stack, uptr size) {
   // murmur2
   const u32 m = 0x5bd1e995;
@@ -77,7 +83,7 @@
 }
 
 static StackDesc *allocDesc(uptr size) {
-  // Frist, try to allocate optimisitically.
+  // First, try to allocate optimisitically.
   uptr memsz = sizeof(StackDesc) + (size - 1) * sizeof(uptr);
   StackDesc *s = tryallocDesc(memsz);
   if (s)
@@ -93,6 +99,7 @@
     if (allocsz < memsz)
       allocsz = memsz;
     uptr mem = (uptr)MmapOrDie(allocsz, "stack depot");
+    stats.mapped += allocsz;
     atomic_store(&depot.region_end, mem + allocsz, memory_order_release);
     atomic_store(&depot.region_pos, mem, memory_order_release);
   }
@@ -156,6 +163,7 @@
   }
   uptr part = (h % kTabSize) / kPartSize;
   id = atomic_fetch_add(&depot.seq[part], 1, memory_order_relaxed) + 1;
+  stats.n_uniq_ids++;
   CHECK_LT(id, kMaxId);
   id |= part << kPartShift;
   CHECK_NE(id, 0);
diff --git a/lib/sanitizer_common/sanitizer_stackdepot.h b/lib/sanitizer_common/sanitizer_stackdepot.h
index 98db08a..5915fdb 100644
--- a/lib/sanitizer_common/sanitizer_stackdepot.h
+++ b/lib/sanitizer_common/sanitizer_stackdepot.h
@@ -13,7 +13,7 @@
 #ifndef SANITIZER_STACKDEPOT_H
 #define SANITIZER_STACKDEPOT_H
 
-#include "sanitizer/common_interface_defs.h"
+#include "sanitizer_internal_defs.h"
 
 namespace __sanitizer {
 
@@ -24,6 +24,13 @@
 // Retrieves a stored stack trace by the id.
 const uptr *StackDepotGet(u32 id, uptr *size);
 
+struct StackDepotStats {
+  uptr n_uniq_ids;
+  uptr mapped;
+};
+
+StackDepotStats *StackDepotGetStats();
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_STACKDEPOT_H
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.cc b/lib/sanitizer_common/sanitizer_stacktrace.cc
index 7525895..6309b23 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace.cc
@@ -17,18 +17,16 @@
 #include "sanitizer_symbolizer.h"
 
 namespace __sanitizer {
-static const char *StripPathPrefix(const char *filepath,
-                                   const char *strip_file_prefix) {
+const char *StripPathPrefix(const char *filepath,
+                            const char *strip_file_prefix) {
+  if (filepath == 0) return 0;
   if (filepath == internal_strstr(filepath, strip_file_prefix))
     return filepath + internal_strlen(strip_file_prefix);
   return filepath;
 }
 
 // ----------------------- StackTrace ----------------------------- {{{1
-// PCs in stack traces are actually the return addresses, that is,
-// addresses of the next instructions after the call. That's why we
-// decrement them.
-static uptr patch_pc(uptr pc) {
+uptr StackTrace::GetPreviousInstructionPc(uptr pc) {
 #ifdef __arm__
   // Cancel Thumb bit.
   pc = pc & (~1);
@@ -71,7 +69,9 @@
   InternalScopedBuffer<AddressInfo> addr_frames(64);
   uptr frame_num = 0;
   for (uptr i = 0; i < size && addr[i]; i++) {
-    uptr pc = patch_pc(addr[i]);
+    // PCs in stack traces are actually the return addresses, that is,
+    // addresses of the next instructions after the call.
+    uptr pc = GetPreviousInstructionPc(addr[i]);
     uptr addr_frames_num = 0;  // The number of stack frames for current
                                // instruction address.
     if (symbolize_callback) {
@@ -131,8 +131,9 @@
   CHECK(size == 0 && trace[0] == pc);
   size = 1;
   uhwptr *frame = (uhwptr *)bp;
-  uhwptr *prev_frame = frame;
-  while (frame >= prev_frame &&
+  uhwptr *prev_frame = frame - 1;
+  // Avoid infinite loop when frame == frame[0] by using frame > prev_frame.
+  while (frame > prev_frame &&
          frame < (uhwptr *)stack_top - 2 &&
          frame > (uhwptr *)stack_bottom &&
          size < max_size) {
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.h b/lib/sanitizer_common/sanitizer_stacktrace.h
index fe2dcf0..b9ea9b4 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.h
+++ b/lib/sanitizer_common/sanitizer_stacktrace.h
@@ -44,10 +44,12 @@
   }
 
   void FastUnwindStack(uptr pc, uptr bp, uptr stack_top, uptr stack_bottom);
+  void SlowUnwindStack(uptr pc, uptr max_depth);
 
   void PopStackFrames(uptr count);
 
   static uptr GetCurrentPc();
+  static uptr GetPreviousInstructionPc(uptr pc);
 
   static uptr CompressStack(StackTrace *stack,
                             u32 *compressed, uptr size);
@@ -55,6 +57,10 @@
                               u32 *compressed, uptr size);
 };
 
+
+const char *StripPathPrefix(const char *filepath,
+                            const char *strip_file_prefix);
+
 }  // namespace __sanitizer
 
 // Use this macro if you want to print stack trace with the caller
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.cc b/lib/sanitizer_common/sanitizer_symbolizer.cc
index d52cd07..5609e91 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer.cc
@@ -68,7 +68,18 @@
   char *buff;
   const char *ret = ExtractToken(str, delims, &buff);
   if (buff != 0) {
-    *result = internal_atoll(buff);
+    *result = (int)internal_atoll(buff);
+  }
+  InternalFree(buff);
+  return ret;
+}
+
+static const char *ExtractUptr(const char *str, const char *delims,
+                               uptr *result) {
+  char *buff;
+  const char *ret = ExtractToken(str, delims, &buff);
+  if (buff != 0) {
+    *result = (uptr)internal_atoll(buff);
   }
   InternalFree(buff);
   return ret;
@@ -98,26 +109,140 @@
     CHECK_NE(output_fd_, kInvalidFd);
   }
 
-  // Returns the number of frames for a given address, or zero if
-  // symbolization failed.
-  uptr SymbolizeCode(uptr addr, const char *module_name, uptr module_offset,
-                     AddressInfo *frames, uptr max_frames) {
+  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
     CHECK(module_name);
-    // FIXME: Make sure this buffer always has sufficient size to hold
-    // large debug info.
-    static const int kMaxBufferSize = 4096;
-    InternalScopedBuffer<char> buffer(kMaxBufferSize);
-    char *buffer_data = buffer.data();
-    internal_snprintf(buffer_data, kMaxBufferSize, "%s 0x%zx\n",
-                      module_name, module_offset);
-    if (!writeToSymbolizer(buffer_data, internal_strlen(buffer_data)))
+    internal_snprintf(buffer_, kBufferSize, "%s%s 0x%zx\n",
+                      is_data ? "DATA " : "", module_name, module_offset);
+    if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
       return 0;
+    if (!readFromSymbolizer(buffer_, kBufferSize))
+      return 0;
+    return buffer_;
+  }
 
-    if (!readFromSymbolizer(buffer_data, kMaxBufferSize))
+  bool Restart() {
+    if (times_restarted_ >= kMaxTimesRestarted) return false;
+    times_restarted_++;
+    internal_close(input_fd_);
+    internal_close(output_fd_);
+    return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
+  }
+
+ private:
+  bool readFromSymbolizer(char *buffer, uptr max_length) {
+    if (max_length == 0)
+      return true;
+    uptr read_len = 0;
+    while (true) {
+      uptr just_read = internal_read(input_fd_, buffer + read_len,
+                                     max_length - read_len);
+      // We can't read 0 bytes, as we don't expect external symbolizer to close
+      // its stdout.
+      if (just_read == 0 || just_read == (uptr)-1) {
+        Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
+        return false;
+      }
+      read_len += just_read;
+      // Empty line marks the end of symbolizer output.
+      if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
+                           buffer[read_len - 2] == '\n') {
+        break;
+      }
+    }
+    return true;
+  }
+
+  bool writeToSymbolizer(const char *buffer, uptr length) {
+    if (length == 0)
+      return true;
+    uptr write_len = internal_write(output_fd_, buffer, length);
+    if (write_len == 0 || write_len == (uptr)-1) {
+      Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
+      return false;
+    }
+    return true;
+  }
+
+  const char *path_;
+  int input_fd_;
+  int output_fd_;
+
+  static const uptr kBufferSize = 16 * 1024;
+  char buffer_[kBufferSize];
+
+  static const uptr kMaxTimesRestarted = 5;
+  uptr times_restarted_;
+};
+
+static LowLevelAllocator symbolizer_allocator;  // Linker initialized.
+
+#if SANITIZER_SUPPORTS_WEAK_HOOKS
+extern "C" {
+SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
+bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
+                                char *Buffer, int MaxLength);
+SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
+bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
+                                char *Buffer, int MaxLength);
+}  // extern "C"
+
+class InternalSymbolizer {
+ public:
+  typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
+  static InternalSymbolizer *get() {
+    if (__sanitizer_symbolize_code != 0 &&
+        __sanitizer_symbolize_data != 0) {
+      void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
+      return new(mem) InternalSymbolizer();
+    }
+    return 0;
+  }
+  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
+    SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
+                                                : __sanitizer_symbolize_code;
+    if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
+      return buffer_;
+    return 0;
+  }
+
+ private:
+  InternalSymbolizer() { }
+
+  static const int kBufferSize = 16 * 1024;
+  char buffer_[kBufferSize];
+};
+#else  // SANITIZER_SUPPORTS_WEAK_HOOKS
+
+class InternalSymbolizer {
+ public:
+  static InternalSymbolizer *get() { return 0; }
+  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
+    return 0;
+  }
+};
+
+#endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
+
+class Symbolizer {
+ public:
+  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
+    if (max_frames == 0)
       return 0;
-    const char *str = buffer_data;
-    uptr frame_id;
-    CHECK_GT(max_frames, 0);
+    LoadedModule *module = FindModuleForAddress(addr);
+    if (module == 0)
+      return 0;
+    const char *module_name = module->full_name();
+    uptr module_offset = addr - module->base_address();
+    const char *str = SendCommand(false, module_name, module_offset);
+    if (str == 0) {
+      // External symbolizer was not initialized or failed. Fill only data
+      // about module name and offset.
+      AddressInfo *info = &frames[0];
+      info->Clear();
+      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
+      return 1;
+    }
+    uptr frame_id = 0;
     for (frame_id = 0; frame_id < max_frames; frame_id++) {
       AddressInfo *info = &frames[frame_id];
       char *function_name = 0;
@@ -160,110 +285,23 @@
     return frame_id;
   }
 
-  bool Restart() {
-    if (times_restarted_ >= kMaxTimesRestarted) return false;
-    times_restarted_++;
-    internal_close(input_fd_);
-    internal_close(output_fd_);
-    return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
-  }
-
- private:
-  bool readFromSymbolizer(char *buffer, uptr max_length) {
-    if (max_length == 0)
-      return true;
-    uptr read_len = 0;
-    while (true) {
-      uptr just_read = internal_read(input_fd_, buffer + read_len,
-                                     max_length - read_len);
-      // We can't read 0 bytes, as we don't expect external symbolizer to close
-      // its stdout.
-      if (just_read == 0 || just_read == (uptr)-1) {
-        Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
-        return false;
-      }
-      read_len += just_read;
-      // Empty line marks the end of symbolizer output.
-      if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
-                           buffer[read_len - 2] == '\n') {
-        break;
-      }
-    }
-    return true;
-  }
-  bool writeToSymbolizer(const char *buffer, uptr length) {
-    if (length == 0)
-      return true;
-    uptr write_len = internal_write(output_fd_, buffer, length);
-    if (write_len == 0 || write_len == (uptr)-1) {
-      Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
-      return false;
-    }
-    return true;
-  }
-
-  const char *path_;
-  int input_fd_;
-  int output_fd_;
-
-  static const uptr kMaxTimesRestarted = 5;
-  uptr times_restarted_;
-};
-
-static LowLevelAllocator symbolizer_allocator;  // Linker initialized.
-
-class Symbolizer {
- public:
-  uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
-    if (max_frames == 0)
-      return 0;
-    LoadedModule *module = FindModuleForAddress(addr);
-    if (module == 0)
-      return 0;
-    const char *module_name = module->full_name();
-    uptr module_offset = addr - module->base_address();
-    uptr actual_frames = 0;
-    if (external_symbolizer_ == 0) {
-      ReportExternalSymbolizerError(
-          "WARNING: Trying to symbolize code, but external "
-          "symbolizer is not initialized!\n");
-    } else {
-      while (true) {
-        actual_frames = external_symbolizer_->SymbolizeCode(
-            addr, module_name, module_offset, frames, max_frames);
-        if (actual_frames > 0) {
-          // Symbolization was successful.
-          break;
-        }
-        // Try to restart symbolizer subprocess. If we don't succeed, forget
-        // about it and don't try to use it later.
-        if (!external_symbolizer_->Restart()) {
-          ReportExternalSymbolizerError(
-              "WARNING: Failed to use and restart external symbolizer!\n");
-          external_symbolizer_ = 0;
-          break;
-        }
-      }
-    }
-    if (external_symbolizer_ == 0) {
-      // External symbolizer was not initialized or failed. Fill only data
-      // about module name and offset.
-      AddressInfo *info = &frames[0];
-      info->Clear();
-      info->FillAddressAndModuleInfo(addr, module_name, module_offset);
-      return 1;
-    }
-    // Otherwise, the data was filled by external symbolizer.
-    return actual_frames;
-  }
-
-  bool SymbolizeData(uptr addr, AddressInfo *frame) {
+  bool SymbolizeData(uptr addr, DataInfo *info) {
     LoadedModule *module = FindModuleForAddress(addr);
     if (module == 0)
       return false;
     const char *module_name = module->full_name();
     uptr module_offset = addr - module->base_address();
-    frame->FillAddressAndModuleInfo(addr, module_name, module_offset);
+    internal_memset(info, 0, sizeof(*info));
+    info->address = addr;
+    info->module = internal_strdup(module_name);
+    info->module_offset = module_offset;
+    const char *str = SendCommand(true, module_name, module_offset);
+    if (str == 0)
+      return true;
+    str = ExtractToken(str, "\n", &info->name);
+    str = ExtractUptr(str, " ", &info->start);
+    str = ExtractUptr(str, "\n", &info->size);
+    info->start += module->base_address();
     return true;
   }
 
@@ -277,14 +315,53 @@
     return true;
   }
 
+  bool IsSymbolizerAvailable() {
+    if (internal_symbolizer_ == 0)
+      internal_symbolizer_ = InternalSymbolizer::get();
+    return internal_symbolizer_ || external_symbolizer_;
+  }
+
  private:
+  char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
+    // First, try to use internal symbolizer.
+    if (!IsSymbolizerAvailable()) {
+      return 0;
+    }
+    if (internal_symbolizer_) {
+      return internal_symbolizer_->SendCommand(is_data, module_name,
+                                               module_offset);
+    }
+    // Otherwise, fall back to external symbolizer.
+    if (external_symbolizer_ == 0) {
+      ReportExternalSymbolizerError(
+          "WARNING: Trying to symbolize code, but external "
+          "symbolizer is not initialized!\n");
+      return 0;
+    }
+    for (;;) {
+      char *reply = external_symbolizer_->SendCommand(is_data, module_name,
+          module_offset);
+      if (reply)
+        return reply;
+      // Try to restart symbolizer subprocess. If we don't succeed, forget
+      // about it and don't try to use it later.
+      if (!external_symbolizer_->Restart()) {
+        ReportExternalSymbolizerError(
+            "WARNING: Failed to use and restart external symbolizer!\n");
+        external_symbolizer_ = 0;
+        return 0;
+      }
+    }
+  }
+
   LoadedModule *FindModuleForAddress(uptr address) {
     if (modules_ == 0) {
       modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
           kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
       CHECK(modules_);
       n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts);
-      CHECK_GT(n_modules_, 0);
+      // FIXME: Return this check when GetListOfModules is implemented on Mac.
+      // CHECK_GT(n_modules_, 0);
       CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
     }
     for (uptr i = 0; i < n_modules_; i++) {
@@ -310,6 +387,7 @@
   uptr n_modules_;
 
   ExternalSymbolizer *external_symbolizer_;  // Leaked.
+  InternalSymbolizer *internal_symbolizer_;  // Leaked.
 };
 
 static Symbolizer symbolizer;  // Linker initialized.
@@ -318,12 +396,16 @@
   return symbolizer.SymbolizeCode(address, frames, max_frames);
 }
 
-bool SymbolizeData(uptr address, AddressInfo *frame) {
-  return symbolizer.SymbolizeData(address, frame);
+bool SymbolizeData(uptr address, DataInfo *info) {
+  return symbolizer.SymbolizeData(address, info);
 }
 
 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
   return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
 }
 
+bool IsSymbolizerAvailable() {
+  return symbolizer.IsSymbolizerAvailable();
+}
+
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.h b/lib/sanitizer_common/sanitizer_symbolizer.h
index 196e108..dd2037e 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -53,12 +53,26 @@
   }
 };
 
+struct DataInfo {
+  uptr address;
+  char *module;
+  uptr module_offset;
+  char *name;
+  uptr start;
+  uptr size;
+};
+
 // Fills at most "max_frames" elements of "frames" with descriptions
 // for a given address (in all inlined functions). Returns the number
 // of descriptions actually filled.
 // This function should NOT be called from two threads simultaneously.
 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames);
-bool SymbolizeData(uptr address, AddressInfo *frame);
+bool SymbolizeData(uptr address, DataInfo *info);
+
+bool IsSymbolizerAvailable();
+
+// Attempts to demangle the provided C++ mangled name.
+const char *Demangle(const char *Name);
 
 // Starts external symbolizer program in a subprocess. Sanitizer communicates
 // with external symbolizer via pipes.
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_itanium.cc b/lib/sanitizer_common/sanitizer_symbolizer_itanium.cc
new file mode 100644
index 0000000..4386294
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_symbolizer_itanium.cc
@@ -0,0 +1,42 @@
+//===-- sanitizer_symbolizer_itanium.cc -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between the sanitizer run-time libraries.
+// Itanium C++ ABI-specific implementation of symbolizer parts.
+//===----------------------------------------------------------------------===//
+#if defined(__APPLE__) || defined(__linux__)
+
+#include "sanitizer_symbolizer.h"
+
+#include <stdlib.h>
+
+// C++ demangling function, as required by Itanium C++ ABI. This is weak,
+// because we do not require a C++ ABI library to be linked to a program
+// using sanitizers; if it's not present, we'll just use the mangled name.
+namespace __cxxabiv1 {
+  extern "C" char *__cxa_demangle(const char *mangled, char *buffer,
+                                  size_t *length, int *status)
+    SANITIZER_WEAK_ATTRIBUTE;
+}
+
+const char *__sanitizer::Demangle(const char *MangledName) {
+  // FIXME: __cxa_demangle aggressively insists on allocating memory.
+  // There's not much we can do about that, short of providing our
+  // own demangler (libc++abi's implementation could be adapted so that
+  // it does not allocate). For now, we just call it anyway, and we leak
+  // the returned value.
+  if (__cxxabiv1::__cxa_demangle)
+    if (const char *Demangled =
+          __cxxabiv1::__cxa_demangle(MangledName, 0, 0, 0))
+      return Demangled;
+
+  return MangledName;
+}
+
+#endif  // __APPLE__ || __linux__
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_linux.cc b/lib/sanitizer_common/sanitizer_symbolizer_linux.cc
index 4bd3dc8..c068839 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_linux.cc
@@ -99,7 +99,7 @@
     for (int fd = getdtablesize(); fd > 2; fd--)
       internal_close(fd);
     execl(path_to_symbolizer, path_to_symbolizer, (char*)0);
-    Exit(1);
+    internal__exit(1);
   }
 
   // Continue execution in parent process.
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_mac.cc b/lib/sanitizer_common/sanitizer_symbolizer_mac.cc
index 2399360..cd0d004 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_mac.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_mac.cc
@@ -23,7 +23,8 @@
 }
 
 uptr GetListOfModules(LoadedModule *modules, uptr max_modules) {
-  UNIMPLEMENTED();
+  // FIXME: Actually implement this on Mac.
+  return 0;
 }
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_win.cc b/lib/sanitizer_common/sanitizer_symbolizer_win.cc
index ad6c303..f1b6a02 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_win.cc
@@ -28,6 +28,10 @@
   UNIMPLEMENTED();
 };
 
+const char *Demangle(const char *MangledName) {
+  return MangledName;
+}
+
 }  // namespace __sanitizer
 
 #endif  // _WIN32
diff --git a/lib/sanitizer_common/sanitizer_win.cc b/lib/sanitizer_common/sanitizer_win.cc
index 49a4e8b..7b540a2 100644
--- a/lib/sanitizer_common/sanitizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_win.cc
@@ -15,10 +15,13 @@
 #define WIN32_LEAN_AND_MEAN
 #define NOGDI
 #include <stdlib.h>
+#include <io.h>
 #include <windows.h>
 
 #include "sanitizer_common.h"
 #include "sanitizer_libc.h"
+#include "sanitizer_placement_new.h"
+#include "sanitizer_mutex.h"
 
 namespace __sanitizer {
 
@@ -75,6 +78,8 @@
 }
 
 void *MmapFixedNoReserve(uptr fixed_addr, uptr size) {
+  // FIXME: is this really "NoReserve"? On Win32 this does not matter much,
+  // but on Win64 it does.
   void *p = VirtualAlloc((LPVOID)fixed_addr, size,
       MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
   if (p == 0)
@@ -83,11 +88,20 @@
   return p;
 }
 
+void *MmapFixedOrDie(uptr fixed_addr, uptr size) {
+  return MmapFixedNoReserve(fixed_addr, size);
+}
+
 void *Mprotect(uptr fixed_addr, uptr size) {
   return VirtualAlloc((LPVOID)fixed_addr, size,
                       MEM_RESERVE | MEM_COMMIT, PAGE_NOACCESS);
 }
 
+void FlushUnneededShadowMemory(uptr addr, uptr size) {
+  // This is almost useless on 32-bits.
+  // FIXME: add madvice-analog when we move to 64-bits.
+}
+
 bool MemoryRangeIsAvailable(uptr range_start, uptr range_end) {
   // FIXME: shall we do anything here on Windows?
   return true;
@@ -117,6 +131,10 @@
   UNIMPLEMENTED();
 }
 
+u32 GetUid() {
+  UNIMPLEMENTED();
+}
+
 void DumpProcessMap() {
   UNIMPLEMENTED();
 }
@@ -149,10 +167,6 @@
   Sleep(millis);
 }
 
-void Exit(int exitcode) {
-  _exit(exitcode);
-}
-
 void Abort() {
   abort();
   _exit(-1);  // abort is not NORETURN on Windows.
@@ -179,10 +193,18 @@
 }
 
 int internal_isatty(fd_t fd) {
+  return _isatty(fd);
+}
+
+fd_t internal_open(const char *filename, int flags) {
   UNIMPLEMENTED();
 }
 
-fd_t internal_open(const char *filename, bool write) {
+fd_t internal_open(const char *filename, int flags, u32 mode) {
+  UNIMPLEMENTED();
+}
+
+fd_t OpenFile(const char *filename, bool write) {
   UNIMPLEMENTED();
 }
 
@@ -202,6 +224,18 @@
   return ret;
 }
 
+int internal_stat(const char *path, void *buf) {
+  UNIMPLEMENTED();
+}
+
+int internal_lstat(const char *path, void *buf) {
+  UNIMPLEMENTED();
+}
+
+int internal_fstat(fd_t fd, void *buf) {
+  UNIMPLEMENTED();
+}
+
 uptr internal_filesize(fd_t fd) {
   UNIMPLEMENTED();
 }
@@ -219,6 +253,48 @@
   return 0;
 }
 
+void internal__exit(int exitcode) {
+  _exit(exitcode);
+}
+
+// ---------------------- BlockingMutex ---------------- {{{1
+const uptr LOCK_UNINITIALIZED = 0;
+const uptr LOCK_READY = (uptr)-1;
+
+BlockingMutex::BlockingMutex(LinkerInitialized li) {
+  // FIXME: see comments in BlockingMutex::Lock() for the details.
+  CHECK(li == LINKER_INITIALIZED || owner_ == LOCK_UNINITIALIZED);
+
+  CHECK(sizeof(CRITICAL_SECTION) <= sizeof(opaque_storage_));
+  InitializeCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
+  owner_ = LOCK_READY;
+}
+
+void BlockingMutex::Lock() {
+  if (owner_ == LOCK_UNINITIALIZED) {
+    // FIXME: hm, global BlockingMutex objects are not initialized?!?
+    // This might be a side effect of the clang+cl+link Frankenbuild...
+    new(this) BlockingMutex((LinkerInitialized)(LINKER_INITIALIZED + 1));
+
+    // FIXME: If it turns out the linker doesn't invoke our
+    // constructors, we should probably manually Lock/Unlock all the global
+    // locks while we're starting in one thread to avoid double-init races.
+  }
+  EnterCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
+  CHECK_EQ(owner_, LOCK_READY);
+  owner_ = GetThreadSelf();
+}
+
+void BlockingMutex::Unlock() {
+  CHECK_EQ(owner_, GetThreadSelf());
+  owner_ = LOCK_READY;
+  LeaveCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
+}
+
+void BlockingMutex::CheckLocked() {
+  CHECK_EQ(owner_, GetThreadSelf());
+}
+
 }  // namespace __sanitizer
 
 #endif  // _WIN32
diff --git a/lib/sanitizer_common/scripts/check_lint.sh b/lib/sanitizer_common/scripts/check_lint.sh
index ca0cafa..8596629 100755
--- a/lib/sanitizer_common/scripts/check_lint.sh
+++ b/lib/sanitizer_common/scripts/check_lint.sh
@@ -13,24 +13,31 @@
 # Cpplint setup
 cd ${SCRIPT_DIR}
 if [ ! -d cpplint ]; then
-  svn co -r83 http://google-styleguide.googlecode.com/svn/trunk/cpplint cpplint
+  svn co http://google-styleguide.googlecode.com/svn/trunk/cpplint cpplint
+else
+  (cd cpplint && svn up)
 fi
 CPPLINT=${SCRIPT_DIR}/cpplint/cpplint.py
 
 # Filters
 # TODO: remove some of these filters
-ASAN_RTL_LINT_FILTER=-readability/casting,-readability/check,-build/include,-build/header_guard,-build/class,-legal/copyright,-build/namespaces
-ASAN_TEST_LINT_FILTER=-readability/casting,-build/include,-legal/copyright,-whitespace/newline,-runtime/sizeof,-runtime/int,-runtime/printf,-build/header_guard
-TSAN_RTL_LINT_FILTER=-legal/copyright,-build/include,-readability/casting,-build/header_guard,-build/namespaces
+COMMON_LINT_FILTER=-build/include,-build/header_guard,-legal/copyright,-whitespace/comments,-readability/casting,\
+-build/namespaces
+ASAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER},-readability/check,-runtime/int
+ASAN_TEST_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/sizeof,-runtime/int,-runtime/printf
+ASAN_LIT_TEST_LINT_FILTER=${ASAN_TEST_LINT_FILTER},-whitespace/line_length
+TSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER}
 TSAN_TEST_LINT_FILTER=${TSAN_RTL_LINT_FILTER},-runtime/threadsafe_fn,-runtime/int
+TSAN_LIT_TEST_LINT_FILTER=${TSAN_TEST_LINT_FILTER},-whitespace/line_length
+MSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER}
+COMMON_RTL_INC_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int,-runtime/sizeof,-runtime/printf
 
 cd ${LLVM_CHECKOUT}
 
 # LLVM Instrumentation
 LLVM_INSTRUMENTATION=lib/Transforms/Instrumentation
 LLVM_LINT_FILTER=-,+whitespace
-${CPPLINT} --filter=${LLVM_LINT_FILTER} ${LLVM_INSTRUMENTATION}/AddressSanitizer.cpp \
-                                        ${LLVM_INSTRUMENTATION}/ThreadSanitizer.cpp \
+${CPPLINT} --filter=${LLVM_LINT_FILTER} ${LLVM_INSTRUMENTATION}/*Sanitizer.cpp \
                                         ${LLVM_INSTRUMENTATION}/BlackList.*
 
 COMPILER_RT=projects/compiler-rt
@@ -41,10 +48,10 @@
 
 # Sanitizer_common
 COMMON_RTL=${COMPILER_RT}/lib/sanitizer_common
-${CPPLINT} --filter=${ASAN_RTL_LINT_FILTER} ${COMMON_RTL}/*.{cc,h}
-${CPPLINT} --filter=${TSAN_RTL_LINT_FILTER} ${COMMON_RTL}/tests/*.cc
+${CPPLINT} --filter=${COMMON_RTL_INC_LINT_FILTER} ${COMMON_RTL}/*.{cc,h}
+${CPPLINT} --filter=${COMMON_RTL_INC_LINT_FILTER} ${COMMON_RTL}/tests/*.cc
 
-#Interception
+# Interception
 INTERCEPTION=${COMPILER_RT}/lib/interception
 ${CPPLINT} --filter=${ASAN_RTL_LINT_FILTER} ${INTERCEPTION}/*.{cc,h}
 
@@ -52,12 +59,28 @@
 ASAN_RTL=${COMPILER_RT}/lib/asan
 ${CPPLINT} --filter=${ASAN_RTL_LINT_FILTER} ${ASAN_RTL}/*.{cc,h}
 ${CPPLINT} --filter=${ASAN_TEST_LINT_FILTER} ${ASAN_RTL}/tests/*.{cc,h}
-${CPPLINT} --filter=${ASAN_TEST_LINT_FILTER} ${ASAN_RTL}/lit_tests/*.cc \
+${CPPLINT} --filter=${ASAN_LIT_TEST_LINT_FILTER} ${ASAN_RTL}/lit_tests/*.cc \
                                              ${ASAN_RTL}/lit_tests/*/*.cc \
 
 # TSan
 TSAN_RTL=${COMPILER_RT}/lib/tsan
 ${CPPLINT} --filter=${TSAN_RTL_LINT_FILTER} ${TSAN_RTL}/rtl/*.{cc,h}
 ${CPPLINT} --filter=${TSAN_TEST_LINT_FILTER} ${TSAN_RTL}/tests/rtl/*.{cc,h} \
-                                             ${TSAN_RTL}/tests/unit/*.cc \
-                                             ${TSAN_RTL}/lit_tests/*.cc
+                                             ${TSAN_RTL}/tests/unit/*.cc
+${CPPLINT} --filter=${TSAN_LIT_TEST_LINT_FILTER} ${TSAN_RTL}/lit_tests/*.cc
+
+# MSan
+MSAN_RTL=${COMPILER_RT}/lib/msan
+${CPPLINT} --filter=${MSAN_RTL_LINT_FILTER} ${MSAN_RTL}/*.{cc,h}
+
+set +e
+
+# Misc files
+FILES=${COMMON_RTL}/*.inc
+for FILE in $FILES; do
+    TMPFILE=$(mktemp -u ${FILE}.XXXXX).cc
+    echo "Checking $FILE"
+    cp -f $FILE $TMPFILE && \
+        ${CPPLINT} --filter=${COMMON_RTL_INC_LINT_FILTER} $TMPFILE
+    rm $TMPFILE
+done
diff --git a/lib/sanitizer_common/tests/CMakeLists.txt b/lib/sanitizer_common/tests/CMakeLists.txt
index 3baa08b..5b414b2 100644
--- a/lib/sanitizer_common/tests/CMakeLists.txt
+++ b/lib/sanitizer_common/tests/CMakeLists.txt
@@ -1,36 +1,139 @@
+include(CompilerRTCompile)
+
 set(SANITIZER_UNITTESTS
   sanitizer_allocator_test.cc
   sanitizer_common_test.cc
   sanitizer_flags_test.cc
   sanitizer_libc_test.cc
+  sanitizer_linux_test.cc
   sanitizer_list_test.cc
+  sanitizer_mutex_test.cc
   sanitizer_printf_test.cc
+  sanitizer_scanf_interceptor_test.cc
   sanitizer_stackdepot_test.cc
+  sanitizer_stacktrace_test.cc
   sanitizer_test_main.cc
   )
 
+set(SANITIZER_TEST_HEADERS)
+foreach(header ${SANITIZER_HEADERS})
+  list(APPEND SANITIZER_TEST_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../${header})
+endforeach()
+
 include_directories(..)
 include_directories(../..)
 
-# Unittest target.
+# Adds static library which contains sanitizer_common object file
+# (universal binary on Mac and arch-specific object files on Linux).
+macro(add_sanitizer_common_lib library)
+  add_library(${library} STATIC ${ARGN})
+  set_target_properties(${library} PROPERTIES
+    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+endmacro()
+
+function(get_sanitizer_common_lib_for_arch arch lib lib_name)
+  if(APPLE)
+    set(tgt_name "RTSanitizerCommon.test.osx")
+  else()
+    set(tgt_name "RTSanitizerCommon.test.${arch}")
+  endif()
+  set(${lib} "${tgt_name}" PARENT_SCOPE)
+  set(${lib_name} "lib${tgt_name}.a" PARENT_SCOPE)
+endfunction()
+
+# Sanitizer_common unit tests testsuite.
 add_custom_target(SanitizerUnitTests)
 set_target_properties(SanitizerUnitTests PROPERTIES
   FOLDER "Sanitizer unittests")
-add_unittest(SanitizerUnitTests SanitizerUnitTest ${SANITIZER_UNITTESTS})
-# Link with sanitizer runtime.
-target_link_libraries(SanitizerUnitTest RTSanitizerCommon.test)
-# Build unit tests with debug info.
-set_property(TARGET SanitizerUnitTest APPEND_STRING
-  PROPERTY COMPILE_FLAGS " -g -Werror")
 
-# Run unittests as a part of lit testsuite.
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-  )
+# Adds sanitizer tests for architecture.
+macro(add_sanitizer_tests_for_arch arch)
+  get_target_flags_for_arch(${arch} TARGET_FLAGS)
+  set(SANITIZER_TEST_SOURCES ${SANITIZER_UNITTESTS}
+                             ${COMPILER_RT_GTEST_SOURCE})
+  set(SANITIZER_TEST_CFLAGS ${COMPILER_RT_GTEST_INCLUDE_CFLAGS}
+                            -I${COMPILER_RT_SOURCE_DIR}/include
+                            -I${COMPILER_RT_SOURCE_DIR}/lib
+                            -I${COMPILER_RT_SOURCE_DIR}/lib/sanitizer_common
+                            -O2 -g -Wall -Werror ${TARGET_FLAGS})
+  set(SANITIZER_TEST_LINK_FLAGS -lstdc++ -lpthread ${TARGET_FLAGS})
+  set(SANITIZER_TEST_OBJECTS)
+  foreach(source ${SANITIZER_TEST_SOURCES})
+    get_filename_component(basename ${source} NAME)
+    set(output_obj "${basename}.${arch}.o")
+    clang_compile(${output_obj} ${source}
+                  CFLAGS ${SANITIZER_TEST_CFLAGS}
+                  DEPS gtest ${SANITIZER_RUNTIME_LIBRARIES}
+                       ${SANITIZER_TEST_HEADERS})
+    list(APPEND SANITIZER_TEST_OBJECTS ${output_obj})
+  endforeach()
+  get_sanitizer_common_lib_for_arch(${arch} SANITIZER_COMMON_LIB
+                                    SANITIZER_COMMON_LIB_NAME)
+  # Add unittest target.
+  set(SANITIZER_TEST_NAME "Sanitizer-${arch}-Test")
+  add_compiler_rt_test(SanitizerUnitTests ${SANITIZER_TEST_NAME}
+                       OBJECTS ${SANITIZER_TEST_OBJECTS}
+                               ${SANITIZER_COMMON_LIB_NAME}
+                       DEPS ${SANITIZER_TEST_OBJECTS} ${SANITIZER_COMMON_LIB}
+                       LINK_FLAGS ${SANITIZER_TEST_LINK_FLAGS})
+endmacro()
 
-add_lit_testsuite(check-sanitizer "Running sanitizer library unittests"
-  ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS SanitizerUnitTests
-  )
-set_target_properties(check-sanitizer PROPERTIES FOLDER "Sanitizer unittests")
+if(COMPILER_RT_CAN_EXECUTE_TESTS)
+  # We use just-built clang to build sanitizer_common unittests, so we must
+  # be sure that produced binaries would work.
+  if(APPLE)
+    add_sanitizer_common_lib("RTSanitizerCommon.test.osx"
+                             $<TARGET_OBJECTS:RTSanitizerCommon.osx>)
+  else()
+    if(CAN_TARGET_x86_64)
+      add_sanitizer_common_lib("RTSanitizerCommon.test.x86_64"
+                               $<TARGET_OBJECTS:RTSanitizerCommon.x86_64>)
+    endif()
+    if(CAN_TARGET_i386)
+      add_sanitizer_common_lib("RTSanitizerCommon.test.i386"
+                               $<TARGET_OBJECTS:RTSanitizerCommon.i386>)
+    endif()
+  endif()
+  if(CAN_TARGET_x86_64)
+    add_sanitizer_tests_for_arch(x86_64)
+  endif()
+  if(CAN_TARGET_i386)
+    add_sanitizer_tests_for_arch(i386)
+  endif()
+
+  # Run unittests as a part of lit testsuite.
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+    )
+
+  add_lit_testsuite(check-sanitizer "Running sanitizer library unittests"
+    ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS SanitizerUnitTests
+    )
+  set_target_properties(check-sanitizer PROPERTIES FOLDER "Sanitizer unittests")
+endif()
+
+if(ANDROID)
+  # We assume that unit tests on Android are built in a build
+  # tree with fresh Clang as a host compiler.
+  add_executable(SanitizerTest
+    ${SANITIZER_UNITTESTS}
+    ${COMPILER_RT_GTEST_SOURCE}
+    $<TARGET_OBJECTS:RTSanitizerCommon.arm.android>
+    )
+  set_target_compile_flags(SanitizerTest
+    ${SANITIZER_COMMON_CFLAGS}
+    ${COMPILER_RT_GTEST_INCLUDE_CFLAGS}
+    -I${COMPILER_RT_SOURCE_DIR}/include
+    -I${COMPILER_RT_SOURCE_DIR}/lib
+    -I${COMPILER_RT_SOURCE_DIR}/lib/sanitizer_common
+    -O2 -g
+    )
+  # Setup correct output directory and link flags.
+  set_target_properties(SanitizerTest PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+  set_target_link_flags(SanitizerTest ${SANITIZER_TEST_LINK_FLAGS})
+  # Add unit test to test suite.
+  add_dependencies(SanitizerUnitTests SanitizerTest)
+endif()
diff --git a/lib/sanitizer_common/tests/lit.site.cfg.in b/lib/sanitizer_common/tests/lit.site.cfg.in
index bb9a28d..ad0ff3c 100644
--- a/lib/sanitizer_common/tests/lit.site.cfg.in
+++ b/lib/sanitizer_common/tests/lit.site.cfg.in
@@ -1,9 +1,15 @@
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
 
-config.build_type = "@CMAKE_BUILD_TYPE@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_build_mode = "@LLVM_BUILD_MODE@"
+
+try:
+  config.llvm_build_mode = config.llvm_build_mode % lit.params
+except KeyError,e:
+  key, = e.args
+  lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key, key))
 
 # Let the main config do the real work.
 lit.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator64_testlib.cc b/lib/sanitizer_common/tests/sanitizer_allocator64_testlib.cc
deleted file mode 100644
index b41f808..0000000
--- a/lib/sanitizer_common/tests/sanitizer_allocator64_testlib.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-//===-- sanitizer_allocator64_testlib.cc ----------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Malloc replacement library based on CombinedAllocator.
-// The primary purpose of this file is an end-to-end integration test
-// for CombinedAllocator.
-//===----------------------------------------------------------------------===//
-#include "sanitizer_common/sanitizer_allocator.h"
-#include <stddef.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <assert.h>
-
-namespace {
-static const uptr kAllocatorSpace = 0x600000000000ULL;
-static const uptr kAllocatorSize = 0x10000000000;  // 1T.
-
-typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 16,
-  DefaultSizeClassMap> PrimaryAllocator;
-typedef SizeClassAllocatorLocalCache<PrimaryAllocator::kNumClasses,
-  PrimaryAllocator> AllocatorCache;
-typedef LargeMmapAllocator SecondaryAllocator;
-typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
-          SecondaryAllocator> Allocator;
-
-static THREADLOCAL AllocatorCache cache;
-static Allocator allocator;
-
-static int inited = 0;
-
-__attribute__((constructor))
-void Init() {
-  if (inited) return;
-  inited = true;  // this must happen before any threads are created.
-  allocator.Init();
-}
-
-}  // namespace
-
-#if 1
-extern "C" {
-void *malloc(size_t size) {
-  Init();
-  assert(inited);
-  return allocator.Allocate(&cache, size, 8);
-}
-
-void free(void *p) {
-  assert(inited);
-  allocator.Deallocate(&cache, p);
-}
-
-void *calloc(size_t nmemb, size_t size) {
-  assert(inited);
-  return allocator.Allocate(&cache, nmemb * size, 8, /*cleared=*/true);
-}
-
-void *realloc(void *p, size_t new_size) {
-  assert(inited);
-  return allocator.Reallocate(&cache, p, new_size, 8);
-}
-
-void *memalign() { assert(0); }
-
-int posix_memalign(void **memptr, size_t alignment, size_t size) {
-  *memptr = allocator.Allocate(&cache, size, alignment);
-  CHECK_EQ(((uptr)*memptr & (alignment - 1)), 0);
-  return 0;
-}
-
-void *valloc(size_t size) {
-  assert(inited);
-  return allocator.Allocate(&cache, size, GetPageSizeCached());
-}
-
-void *pvalloc(size_t size) {
-  assert(inited);
-  if (size == 0) size = GetPageSizeCached();
-  return allocator.Allocate(&cache, size, GetPageSizeCached());
-}
-}
-#endif
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
index e20b1ca..40cf8a5 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
@@ -14,12 +14,18 @@
 #include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_common.h"
 
+#include "sanitizer_test_utils.h"
+
 #include "gtest/gtest.h"
 
 #include <stdlib.h>
+#include <pthread.h>
 #include <algorithm>
 #include <vector>
 
+// Too slow for debug build
+#if TSAN_DEBUG == 0
+
 #if SANITIZER_WORDSIZE == 64
 static const uptr kAllocatorSpace = 0x700000000000ULL;
 static const uptr kAllocatorSize  = 0x010000000000ULL;  // 1T.
@@ -40,31 +46,8 @@
 template <class SizeClassMap>
 void TestSizeClassMap() {
   typedef SizeClassMap SCMap;
-#if 0
-  for (uptr i = 0; i < SCMap::kNumClasses; i++) {
-    printf("c%ld => %ld (%lx) cached=%ld(%ld)\n",
-        i, SCMap::Size(i), SCMap::Size(i), SCMap::MaxCached(i) * SCMap::Size(i),
-        SCMap::MaxCached(i));
-  }
-#endif
-  for (uptr c = 0; c < SCMap::kNumClasses; c++) {
-    uptr s = SCMap::Size(c);
-    CHECK_EQ(SCMap::ClassID(s), c);
-    if (c != SCMap::kNumClasses - 1)
-      CHECK_EQ(SCMap::ClassID(s + 1), c + 1);
-    CHECK_EQ(SCMap::ClassID(s - 1), c);
-    if (c)
-      CHECK_GT(SCMap::Size(c), SCMap::Size(c-1));
-  }
-  CHECK_EQ(SCMap::ClassID(SCMap::kMaxSize + 1), 0);
-
-  for (uptr s = 1; s <= SCMap::kMaxSize; s++) {
-    uptr c = SCMap::ClassID(s);
-    CHECK_LT(c, SCMap::kNumClasses);
-    CHECK_GE(SCMap::Size(c), s);
-    if (c > 0)
-      CHECK_LT(SCMap::Size(c-1), s);
-  }
+  // SCMap::Print();
+  SCMap::Validate();
 }
 
 TEST(SanitizerCommon, DefaultSizeClassMap) {
@@ -79,26 +62,36 @@
 void TestSizeClassAllocator() {
   Allocator *a = new Allocator;
   a->Init();
+  SizeClassAllocatorLocalCache<Allocator> cache;
+  memset(&cache, 0, sizeof(cache));
+  cache.Init(0);
 
   static const uptr sizes[] = {1, 16, 30, 40, 100, 1000, 10000,
-    50000, 60000, 100000, 300000, 500000, 1000000, 2000000};
+    50000, 60000, 100000, 120000, 300000, 500000, 1000000, 2000000};
 
   std::vector<void *> allocated;
 
   uptr last_total_allocated = 0;
-  for (int i = 0; i < 5; i++) {
+  for (int i = 0; i < 3; i++) {
     // Allocate a bunch of chunks.
     for (uptr s = 0; s < ARRAY_SIZE(sizes); s++) {
       uptr size = sizes[s];
       if (!a->CanAllocate(size, 1)) continue;
       // printf("s = %ld\n", size);
-      uptr n_iter = std::max((uptr)2, 1000000 / size);
+      uptr n_iter = std::max((uptr)6, 8000000 / size);
+      // fprintf(stderr, "size: %ld iter: %ld\n", size, n_iter);
       for (uptr i = 0; i < n_iter; i++) {
-        void *x = a->Allocate(size, 1);
+        uptr class_id0 = Allocator::SizeClassMapT::ClassID(size);
+        char *x = (char*)cache.Allocate(a, class_id0);
+        x[0] = 0;
+        x[size - 1] = 0;
+        x[size / 2] = 0;
         allocated.push_back(x);
         CHECK_EQ(x, a->GetBlockBegin(x));
-        CHECK_EQ(x, a->GetBlockBegin((char*)x + size - 1));
+        CHECK_EQ(x, a->GetBlockBegin(x + size - 1));
         CHECK(a->PointerIsMine(x));
+        CHECK(a->PointerIsMine(x + size - 1));
+        CHECK(a->PointerIsMine(x + size / 2));
         CHECK_GE(a->GetActuallyAllocatedSize(x), size);
         uptr class_id = a->GetSizeClass(x);
         CHECK_EQ(class_id, Allocator::SizeClassMapT::ClassID(size));
@@ -113,7 +106,7 @@
       uptr *metadata = reinterpret_cast<uptr*>(a->GetMetaData(x));
       CHECK_EQ(metadata[0], reinterpret_cast<uptr>(x) + 1);
       CHECK_EQ(metadata[1], 0xABCD);
-      a->Deallocate(x);
+      cache.Deallocate(a, a->GetSizeClass(x), x);
     }
     allocated.clear();
     uptr total_allocated = a->TotalMemoryUsed();
@@ -122,6 +115,12 @@
     CHECK_EQ(last_total_allocated, total_allocated);
   }
 
+  // Check that GetBlockBegin never crashes.
+  for (uptr x = 0, step = kAddressSpaceSize / 100000;
+       x < kAddressSpaceSize - step; x += step)
+    if (a->PointerIsMine(reinterpret_cast<void *>(x)))
+      Ident(a->GetBlockBegin(reinterpret_cast<void *>(x)));
+
   a->TestOnlyUnmap();
   delete a;
 }
@@ -144,13 +143,15 @@
 void SizeClassAllocatorMetadataStress() {
   Allocator *a = new Allocator;
   a->Init();
+  SizeClassAllocatorLocalCache<Allocator> cache;
+  memset(&cache, 0, sizeof(cache));
+  cache.Init(0);
   static volatile void *sink;
 
   const uptr kNumAllocs = 10000;
   void *allocated[kNumAllocs];
   for (uptr i = 0; i < kNumAllocs; i++) {
-    uptr size = (i % 4096) + 1;
-    void *x = a->Allocate(size, 1);
+    void *x = cache.Allocate(a, 1 + i % 50);
     allocated[i] = x;
   }
   // Get Metadata kNumAllocs^2 times.
@@ -158,7 +159,7 @@
     sink = a->GetMetaData(allocated[i % kNumAllocs]);
   }
   for (uptr i = 0; i < kNumAllocs; i++) {
-    a->Deallocate(allocated[i]);
+    cache.Deallocate(a, 1 + i % 50, allocated[i]);
   }
 
   a->TestOnlyUnmap();
@@ -179,13 +180,85 @@
   SizeClassAllocatorMetadataStress<Allocator32Compact>();
 }
 
+struct TestMapUnmapCallback {
+  static int map_count, unmap_count;
+  void OnMap(uptr p, uptr size) const { map_count++; }
+  void OnUnmap(uptr p, uptr size) const { unmap_count++; }
+};
+int TestMapUnmapCallback::map_count;
+int TestMapUnmapCallback::unmap_count;
+
+#if SANITIZER_WORDSIZE == 64
+TEST(SanitizerCommon, SizeClassAllocator64MapUnmapCallback) {
+  TestMapUnmapCallback::map_count = 0;
+  TestMapUnmapCallback::unmap_count = 0;
+  typedef SizeClassAllocator64<
+      kAllocatorSpace, kAllocatorSize, 16, DefaultSizeClassMap,
+      TestMapUnmapCallback> Allocator64WithCallBack;
+  Allocator64WithCallBack *a = new Allocator64WithCallBack;
+  a->Init();
+  EXPECT_EQ(TestMapUnmapCallback::map_count, 1);  // Allocator state.
+  SizeClassAllocatorLocalCache<Allocator64WithCallBack> cache;
+  memset(&cache, 0, sizeof(cache));
+  cache.Init(0);
+  AllocatorStats stats;
+  stats.Init();
+  a->AllocateBatch(&stats, &cache, 32);
+  EXPECT_EQ(TestMapUnmapCallback::map_count, 3);  // State + alloc + metadata.
+  a->TestOnlyUnmap();
+  EXPECT_EQ(TestMapUnmapCallback::unmap_count, 1);  // The whole thing.
+  delete a;
+}
+#endif
+
+TEST(SanitizerCommon, SizeClassAllocator32MapUnmapCallback) {
+  TestMapUnmapCallback::map_count = 0;
+  TestMapUnmapCallback::unmap_count = 0;
+  typedef SizeClassAllocator32<
+      0, kAddressSpaceSize, 16, CompactSizeClassMap,
+      TestMapUnmapCallback> Allocator32WithCallBack;
+  Allocator32WithCallBack *a = new Allocator32WithCallBack;
+  a->Init();
+  EXPECT_EQ(TestMapUnmapCallback::map_count, 1);  // Allocator state.
+  SizeClassAllocatorLocalCache<Allocator32WithCallBack>  cache;
+  memset(&cache, 0, sizeof(cache));
+  cache.Init(0);
+  AllocatorStats stats;
+  stats.Init();
+  a->AllocateBatch(&stats, &cache, 32);
+  EXPECT_EQ(TestMapUnmapCallback::map_count, 2);  // alloc.
+  a->TestOnlyUnmap();
+  EXPECT_EQ(TestMapUnmapCallback::unmap_count, 2);  // The whole thing + alloc.
+  delete a;
+  // fprintf(stderr, "Map: %d Unmap: %d\n",
+  //         TestMapUnmapCallback::map_count,
+  //         TestMapUnmapCallback::unmap_count);
+}
+
+TEST(SanitizerCommon, LargeMmapAllocatorMapUnmapCallback) {
+  TestMapUnmapCallback::map_count = 0;
+  TestMapUnmapCallback::unmap_count = 0;
+  LargeMmapAllocator<TestMapUnmapCallback> a;
+  a.Init();
+  AllocatorStats stats;
+  stats.Init();
+  void *x = a.Allocate(&stats, 1 << 20, 1);
+  EXPECT_EQ(TestMapUnmapCallback::map_count, 1);
+  a.Deallocate(&stats, x);
+  EXPECT_EQ(TestMapUnmapCallback::unmap_count, 1);
+}
+
 template<class Allocator>
 void FailInAssertionOnOOM() {
   Allocator a;
   a.Init();
-  const uptr size = 1 << 20;
+  SizeClassAllocatorLocalCache<Allocator> cache;
+  memset(&cache, 0, sizeof(cache));
+  cache.Init(0);
+  AllocatorStats stats;
+  stats.Init();
   for (int i = 0; i < 1000000; i++) {
-    a.Allocate(size, 1);
+    a.AllocateBatch(&stats, &cache, 52);
   }
 
   a.TestOnlyUnmap();
@@ -198,56 +271,69 @@
 #endif
 
 TEST(SanitizerCommon, LargeMmapAllocator) {
-  LargeMmapAllocator a;
+  LargeMmapAllocator<> a;
   a.Init();
+  AllocatorStats stats;
+  stats.Init();
 
-  static const int kNumAllocs = 100;
-  void *allocated[kNumAllocs];
-  static const uptr size = 1000;
+  static const int kNumAllocs = 1000;
+  char *allocated[kNumAllocs];
+  static const uptr size = 4000;
   // Allocate some.
   for (int i = 0; i < kNumAllocs; i++) {
-    allocated[i] = a.Allocate(size, 1);
+    allocated[i] = (char *)a.Allocate(&stats, size, 1);
+    CHECK(a.PointerIsMine(allocated[i]));
   }
   // Deallocate all.
   CHECK_GT(a.TotalMemoryUsed(), size * kNumAllocs);
   for (int i = 0; i < kNumAllocs; i++) {
-    void *p = allocated[i];
+    char *p = allocated[i];
     CHECK(a.PointerIsMine(p));
-    a.Deallocate(p);
+    a.Deallocate(&stats, p);
   }
   // Check that non left.
   CHECK_EQ(a.TotalMemoryUsed(), 0);
 
   // Allocate some more, also add metadata.
   for (int i = 0; i < kNumAllocs; i++) {
-    void *x = a.Allocate(size, 1);
+    char *x = (char *)a.Allocate(&stats, size, 1);
     CHECK_GE(a.GetActuallyAllocatedSize(x), size);
     uptr *meta = reinterpret_cast<uptr*>(a.GetMetaData(x));
     *meta = i;
     allocated[i] = x;
   }
+  for (int i = 0; i < kNumAllocs * kNumAllocs; i++) {
+    char *p = allocated[i % kNumAllocs];
+    CHECK(a.PointerIsMine(p));
+    CHECK(a.PointerIsMine(p + 2000));
+  }
   CHECK_GT(a.TotalMemoryUsed(), size * kNumAllocs);
   // Deallocate all in reverse order.
   for (int i = 0; i < kNumAllocs; i++) {
     int idx = kNumAllocs - i - 1;
-    void *p = allocated[idx];
+    char *p = allocated[idx];
     uptr *meta = reinterpret_cast<uptr*>(a.GetMetaData(p));
     CHECK_EQ(*meta, idx);
     CHECK(a.PointerIsMine(p));
-    a.Deallocate(p);
+    a.Deallocate(&stats, p);
   }
   CHECK_EQ(a.TotalMemoryUsed(), 0);
+
+  // Test alignments.
   uptr max_alignment = SANITIZER_WORDSIZE == 64 ? (1 << 28) : (1 << 24);
   for (uptr alignment = 8; alignment <= max_alignment; alignment *= 2) {
-    for (int i = 0; i < kNumAllocs; i++) {
+    const uptr kNumAlignedAllocs = 100;
+    for (uptr i = 0; i < kNumAlignedAllocs; i++) {
       uptr size = ((i % 10) + 1) * 4096;
-      allocated[i] = a.Allocate(size, alignment);
+      char *p = allocated[i] = (char *)a.Allocate(&stats, size, alignment);
+      CHECK_EQ(p, a.GetBlockBegin(p));
+      CHECK_EQ(p, a.GetBlockBegin(p + size - 1));
+      CHECK_EQ(p, a.GetBlockBegin(p + size / 2));
       CHECK_EQ(0, (uptr)allocated[i] % alignment);
-      char *p = (char*)allocated[i];
       p[0] = p[size - 1] = 0;
     }
-    for (int i = 0; i < kNumAllocs; i++) {
-      a.Deallocate(allocated[i]);
+    for (uptr i = 0; i < kNumAlignedAllocs; i++) {
+      a.Deallocate(&stats, allocated[i]);
     }
   }
 }
@@ -262,7 +348,8 @@
   a->Init();
 
   AllocatorCache cache;
-  cache.Init();
+  memset(&cache, 0, sizeof(cache));
+  a->InitCache(&cache);
 
   EXPECT_EQ(a->Allocate(&cache, -1, 1), (void*)0);
   EXPECT_EQ(a->Allocate(&cache, -1, 1024), (void*)0);
@@ -298,56 +385,58 @@
     allocated.clear();
     a->SwallowCache(&cache);
   }
+  a->DestroyCache(&cache);
   a->TestOnlyUnmap();
 }
 
 #if SANITIZER_WORDSIZE == 64
 TEST(SanitizerCommon, CombinedAllocator64) {
   TestCombinedAllocator<Allocator64,
-      LargeMmapAllocator,
+      LargeMmapAllocator<>,
       SizeClassAllocatorLocalCache<Allocator64> > ();
 }
 
 TEST(SanitizerCommon, CombinedAllocator64Compact) {
   TestCombinedAllocator<Allocator64Compact,
-      LargeMmapAllocator,
+      LargeMmapAllocator<>,
       SizeClassAllocatorLocalCache<Allocator64Compact> > ();
 }
 #endif
 
 TEST(SanitizerCommon, CombinedAllocator32Compact) {
   TestCombinedAllocator<Allocator32Compact,
-      LargeMmapAllocator,
+      LargeMmapAllocator<>,
       SizeClassAllocatorLocalCache<Allocator32Compact> > ();
 }
 
 template <class AllocatorCache>
 void TestSizeClassAllocatorLocalCache() {
-  static THREADLOCAL AllocatorCache static_allocator_cache;
-  static_allocator_cache.Init();
   AllocatorCache cache;
   typedef typename AllocatorCache::Allocator Allocator;
   Allocator *a = new Allocator();
 
   a->Init();
-  cache.Init();
+  memset(&cache, 0, sizeof(cache));
+  cache.Init(0);
 
   const uptr kNumAllocs = 10000;
   const int kNumIter = 100;
   uptr saved_total = 0;
-  for (int i = 0; i < kNumIter; i++) {
-    void *allocated[kNumAllocs];
-    for (uptr i = 0; i < kNumAllocs; i++) {
-      allocated[i] = cache.Allocate(a, 0);
+  for (int class_id = 1; class_id <= 5; class_id++) {
+    for (int it = 0; it < kNumIter; it++) {
+      void *allocated[kNumAllocs];
+      for (uptr i = 0; i < kNumAllocs; i++) {
+        allocated[i] = cache.Allocate(a, class_id);
+      }
+      for (uptr i = 0; i < kNumAllocs; i++) {
+        cache.Deallocate(a, class_id, allocated[i]);
+      }
+      cache.Drain(a);
+      uptr total_allocated = a->TotalMemoryUsed();
+      if (it)
+        CHECK_EQ(saved_total, total_allocated);
+      saved_total = total_allocated;
     }
-    for (uptr i = 0; i < kNumAllocs; i++) {
-      cache.Deallocate(a, 0, allocated[i]);
-    }
-    cache.Drain(a);
-    uptr total_allocated = a->TotalMemoryUsed();
-    if (saved_total)
-      CHECK_EQ(saved_total, total_allocated);
-    saved_total = total_allocated;
   }
 
   a->TestOnlyUnmap();
@@ -371,6 +460,72 @@
       SizeClassAllocatorLocalCache<Allocator32Compact> >();
 }
 
+#if SANITIZER_WORDSIZE == 64
+typedef SizeClassAllocatorLocalCache<Allocator64> AllocatorCache;
+static AllocatorCache static_allocator_cache;
+
+void *AllocatorLeakTestWorker(void *arg) {
+  typedef AllocatorCache::Allocator Allocator;
+  Allocator *a = (Allocator*)(arg);
+  static_allocator_cache.Allocate(a, 10);
+  static_allocator_cache.Drain(a);
+  return 0;
+}
+
+TEST(SanitizerCommon, AllocatorLeakTest) {
+  typedef AllocatorCache::Allocator Allocator;
+  Allocator a;
+  a.Init();
+  uptr total_used_memory = 0;
+  for (int i = 0; i < 100; i++) {
+    pthread_t t;
+    EXPECT_EQ(0, pthread_create(&t, 0, AllocatorLeakTestWorker, &a));
+    EXPECT_EQ(0, pthread_join(t, 0));
+    if (i == 0)
+      total_used_memory = a.TotalMemoryUsed();
+    EXPECT_EQ(a.TotalMemoryUsed(), total_used_memory);
+  }
+
+  a.TestOnlyUnmap();
+}
+
+// Struct which is allocated to pass info to new threads.  The new thread frees
+// it.
+struct NewThreadParams {
+  AllocatorCache *thread_cache;
+  AllocatorCache::Allocator *allocator;
+  uptr class_id;
+};
+
+// Called in a new thread.  Just frees its argument.
+static void *DeallocNewThreadWorker(void *arg) {
+  NewThreadParams *params = reinterpret_cast<NewThreadParams*>(arg);
+  params->thread_cache->Deallocate(params->allocator, params->class_id, params);
+  return NULL;
+}
+
+// The allocator cache is supposed to be POD and zero initialized.  We should be
+// able to call Deallocate on a zeroed cache, and it will self-initialize.
+TEST(Allocator, AllocatorCacheDeallocNewThread) {
+  AllocatorCache::Allocator allocator;
+  allocator.Init();
+  AllocatorCache main_cache;
+  AllocatorCache child_cache;
+  memset(&main_cache, 0, sizeof(main_cache));
+  memset(&child_cache, 0, sizeof(child_cache));
+
+  uptr class_id = DefaultSizeClassMap::ClassID(sizeof(NewThreadParams));
+  NewThreadParams *params = reinterpret_cast<NewThreadParams*>(
+      main_cache.Allocate(&allocator, class_id));
+  params->thread_cache = &child_cache;
+  params->allocator = &allocator;
+  params->class_id = class_id;
+  pthread_t t;
+  EXPECT_EQ(0, pthread_create(&t, 0, DeallocNewThreadWorker, params));
+  EXPECT_EQ(0, pthread_join(t, 0));
+}
+#endif
+
 TEST(Allocator, Basic) {
   char *p = (char*)InternalAlloc(10);
   EXPECT_NE(p, (char*)0);
@@ -386,7 +541,7 @@
   char *ptrs[kCount];
   unsigned rnd = 42;
   for (int i = 0; i < kCount; i++) {
-    uptr sz = rand_r(&rnd) % 1000;
+    uptr sz = my_rand_r(&rnd) % 1000;
     char *p = (char*)InternalAlloc(sz);
     EXPECT_NE(p, (char*)0);
     ptrs[i] = p;
@@ -409,3 +564,5 @@
     EXPECT_EQ('c', char_buf[i]);
   }
 }
+
+#endif  // #if TSAN_DEBUG==0
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc b/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
new file mode 100644
index 0000000..f6a944f
--- /dev/null
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
@@ -0,0 +1,162 @@
+//===-- sanitizer_allocator_testlib.cc ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Malloc replacement library based on CombinedAllocator.
+// The primary purpose of this file is an end-to-end integration test
+// for CombinedAllocator.
+//===----------------------------------------------------------------------===//
+/* Usage:
+clang++ -fno-exceptions  -g -fPIC -I. -I../include -Isanitizer \
+ sanitizer_common/tests/sanitizer_allocator_testlib.cc \
+ sanitizer_common/sanitizer_*.cc -shared -lpthread -o testmalloc.so
+LD_PRELOAD=`pwd`/testmalloc.so /your/app
+*/
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+
+#ifndef SANITIZER_MALLOC_HOOK
+# define SANITIZER_MALLOC_HOOK(p, s)
+#endif
+
+#ifndef SANITIZER_FREE_HOOK
+# define SANITIZER_FREE_HOOK(p)
+#endif
+
+namespace {
+static const uptr kAllocatorSpace = 0x600000000000ULL;
+static const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
+
+typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0,
+  CompactSizeClassMap> PrimaryAllocator;
+typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
+typedef LargeMmapAllocator<> SecondaryAllocator;
+typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
+          SecondaryAllocator> Allocator;
+
+static Allocator allocator;
+static bool global_inited;
+static THREADLOCAL AllocatorCache cache;
+static THREADLOCAL bool thread_inited;
+static pthread_key_t pkey;
+
+static void thread_dtor(void *v) {
+  if ((uptr)v != 3) {
+    pthread_setspecific(pkey, (void*)((uptr)v + 1));
+    return;
+  }
+  allocator.SwallowCache(&cache);
+}
+
+static void NOINLINE thread_init() {
+  if (!global_inited) {
+    global_inited = true;
+    allocator.Init();
+    pthread_key_create(&pkey, thread_dtor);
+  }
+  thread_inited = true;
+  pthread_setspecific(pkey, (void*)1);
+  cache.Init();
+}
+}  // namespace
+
+extern "C" {
+void *malloc(size_t size) {
+  if (UNLIKELY(!thread_inited))
+    thread_init();
+  void *p = allocator.Allocate(&cache, size, 8);
+  SANITIZER_MALLOC_HOOK(p, size);
+  return p;
+}
+
+void free(void *p) {
+  if (UNLIKELY(!thread_inited))
+    thread_init();
+  SANITIZER_FREE_HOOK(p);
+  allocator.Deallocate(&cache, p);
+}
+
+void *calloc(size_t nmemb, size_t size) {
+  if (UNLIKELY(!thread_inited))
+    thread_init();
+  size *= nmemb;
+  void *p = allocator.Allocate(&cache, size, 8, false);
+  memset(p, 0, size);
+  SANITIZER_MALLOC_HOOK(p, size);
+  return p;
+}
+
+void *realloc(void *p, size_t size) {
+  if (UNLIKELY(!thread_inited))
+    thread_init();
+  if (p) {
+    SANITIZER_FREE_HOOK(p);
+  }
+  p = allocator.Reallocate(&cache, p, size, 8);
+  if (p) {
+    SANITIZER_MALLOC_HOOK(p, size);
+  }
+  return p;
+}
+
+void *memalign(size_t alignment, size_t size) {
+  if (UNLIKELY(!thread_inited))
+    thread_init();
+  void *p = allocator.Allocate(&cache, size, alignment);
+  SANITIZER_MALLOC_HOOK(p, size);
+  return p;
+}
+
+int posix_memalign(void **memptr, size_t alignment, size_t size) {
+  if (UNLIKELY(!thread_inited))
+    thread_init();
+  *memptr = allocator.Allocate(&cache, size, alignment);
+  SANITIZER_MALLOC_HOOK(*memptr, size);
+  return 0;
+}
+
+void *valloc(size_t size) {
+  if (UNLIKELY(!thread_inited))
+    thread_init();
+  if (size == 0)
+    size = GetPageSizeCached();
+  void *p = allocator.Allocate(&cache, size, GetPageSizeCached());
+  SANITIZER_MALLOC_HOOK(p, size);
+  return p;
+}
+
+void cfree(void *p) ALIAS("free");
+void *pvalloc(size_t size) ALIAS("valloc");
+void *__libc_memalign(size_t alignment, size_t size) ALIAS("memalign");
+
+void malloc_usable_size() {
+}
+
+void mallinfo() {
+}
+
+void mallopt() {
+}
+}  // extern "C"
+
+namespace std {
+  struct nothrow_t;
+}
+
+void *operator new(size_t size) ALIAS("malloc");
+void *operator new[](size_t size) ALIAS("malloc");
+void *operator new(size_t size, std::nothrow_t const&) ALIAS("malloc");
+void *operator new[](size_t size, std::nothrow_t const&) ALIAS("malloc");
+void operator delete(void *ptr) ALIAS("free");
+void operator delete[](void *ptr) ALIAS("free");
+void operator delete(void *ptr, std::nothrow_t const&) ALIAS("free");
+void operator delete[](void *ptr, std::nothrow_t const&) ALIAS("free");
diff --git a/lib/sanitizer_common/tests/sanitizer_common_test.cc b/lib/sanitizer_common/tests/sanitizer_common_test.cc
index dfb8884..0a777bd 100644
--- a/lib/sanitizer_common/tests/sanitizer_common_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_common_test.cc
@@ -72,7 +72,7 @@
         uptr res = (uptr)MmapAlignedOrDie(
             size * PageSize, alignment * PageSize, "MmapAlignedOrDieTest");
         EXPECT_EQ(0U, res % (alignment * PageSize));
-        memset((void*)res, 1, size * PageSize);
+        internal_memset((void*)res, 1, size * PageSize);
         UnmapOrDie((void*)res, size * PageSize);
       }
     }
@@ -96,4 +96,20 @@
 }
 #endif
 
-}  // namespace sanitizer
+TEST(SanitizerCommon, InternalVector) {
+  InternalVector<uptr> vector(1);
+  for (uptr i = 0; i < 100; i++) {
+    EXPECT_EQ(i, vector.size());
+    vector.push_back(i);
+  }
+  for (uptr i = 0; i < 100; i++) {
+    EXPECT_EQ(i, vector[i]);
+  }
+  for (int i = 99; i >= 0; i--) {
+    EXPECT_EQ((uptr)i, vector.back());
+    vector.pop_back();
+    EXPECT_EQ((uptr)i, vector.size());
+  }
+}
+
+}  // namespace __sanitizer
diff --git a/lib/sanitizer_common/tests/sanitizer_flags_test.cc b/lib/sanitizer_common/tests/sanitizer_flags_test.cc
index 1bb17e9..8c456c6 100644
--- a/lib/sanitizer_common/tests/sanitizer_flags_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_flags_test.cc
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_libc.h"
 #include "gtest/gtest.h"
 
 #include <string.h>
@@ -31,7 +32,7 @@
                         const char *final_value) {
   const char *flag = start_value;
   ParseFlag(env, &flag, kFlagName);
-  EXPECT_STREQ(final_value, flag);
+  EXPECT_EQ(0, internal_strcmp(final_value, flag));
 }
 
 TEST(SanitizerCommon, BooleanFlags) {
@@ -64,4 +65,21 @@
   TestStrFlag("", "--flag_name=\"abc qwe\" asd", "abc qwe");
 }
 
+static void TestTwoFlags(const char *env, bool expected_flag1,
+                         const char *expected_flag2) {
+  bool flag1 = !expected_flag1;
+  const char *flag2 = "";
+  ParseFlag(env, &flag1, "flag1");
+  ParseFlag(env, &flag2, "flag2");
+  EXPECT_EQ(expected_flag1, flag1);
+  EXPECT_EQ(0, internal_strcmp(flag2, expected_flag2));
+}
+
+TEST(SanitizerCommon, MultipleFlags) {
+  TestTwoFlags("flag1=1 flag2='zzz'", true, "zzz");
+  TestTwoFlags("flag2='qxx' flag1=0", false, "qxx");
+  TestTwoFlags("flag1=false:flag2='zzz'", false, "zzz");
+  TestTwoFlags("flag2=qxx:flag1=yes", true, "qxx");
+}
+
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/tests/sanitizer_libc_test.cc b/lib/sanitizer_common/tests/sanitizer_libc_test.cc
index ff38e16..b677130 100644
--- a/lib/sanitizer_common/tests/sanitizer_libc_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_libc_test.cc
@@ -9,9 +9,17 @@
 // Tests for sanitizer_libc.h.
 //===----------------------------------------------------------------------===//
 
+#include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "gtest/gtest.h"
 
+#if defined(__linux__) || defined(__APPLE__)
+# define SANITIZER_TEST_HAS_STAT_H 1
+# include <sys/stat.h>
+#else
+# define SANITIZER_TEST_HAS_STAT_H 0
+#endif
+
 // A regression test for internal_memmove() implementation.
 TEST(SanitizerCommon, InternalMemmoveRegression) {
   char src[] = "Hello World";
@@ -20,3 +28,72 @@
   EXPECT_EQ(dest[0], src[0]);
   EXPECT_EQ(dest[4], src[4]);
 }
+
+TEST(SanitizerCommon, mem_is_zero) {
+  size_t size = 128;
+  char *x = new char[size];
+  memset(x, 0, size);
+  for (size_t pos = 0; pos < size; pos++) {
+    x[pos] = 1;
+    for (size_t beg = 0; beg < size; beg++) {
+      for (size_t end = beg; end < size; end++) {
+        // fprintf(stderr, "pos %zd beg %zd end %zd \n", pos, beg, end);
+        if (beg <= pos && pos < end)
+          EXPECT_FALSE(__sanitizer::mem_is_zero(x + beg, end - beg));
+        else
+          EXPECT_TRUE(__sanitizer::mem_is_zero(x + beg, end - beg));
+      }
+    }
+    x[pos] = 0;
+  }
+  delete [] x;
+}
+
+TEST(SanitizerCommon, FileOps) {
+  const char *str1 = "qwerty";
+  uptr len1 = internal_strlen(str1);
+  const char *str2 = "zxcv";
+  uptr len2 = internal_strlen(str2);
+
+  u32 uid = GetUid();
+  char temp_filename[128];
+#ifdef __ANDROID__
+  // I don't know a way to query temp directory location on Android without
+  // going through Java interfaces. The code below is not ideal, but should
+  // work. May require "adb root", but it is needed for almost any use of ASan
+  // on Android already.
+  internal_snprintf(temp_filename, sizeof(temp_filename),
+                    "%s/sanitizer_common.tmp.%d",
+                    GetEnv("EXTERNAL_STORAGE"), uid);
+#else
+  internal_snprintf(temp_filename, sizeof(temp_filename),
+                    "/tmp/sanitizer_common.tmp.%d", uid);
+#endif
+  fd_t fd = OpenFile(temp_filename, true);
+  EXPECT_NE(fd, kInvalidFd);
+  EXPECT_EQ(len1, internal_write(fd, str1, len1));
+  EXPECT_EQ(len2, internal_write(fd, str2, len2));
+  internal_close(fd);
+
+  fd = OpenFile(temp_filename, false);
+  EXPECT_NE(fd, kInvalidFd);
+  uptr fsize = internal_filesize(fd);
+  EXPECT_EQ(len1 + len2, fsize);
+
+#if SANITIZER_TEST_HAS_STAT_H
+  struct stat st1, st2, st3;
+  EXPECT_EQ(0, internal_stat(temp_filename, &st1));
+  EXPECT_EQ(0, internal_lstat(temp_filename, &st2));
+  EXPECT_EQ(0, internal_fstat(fd, &st3));
+  EXPECT_EQ(fsize, (uptr)st3.st_size);
+#endif
+
+  char buf[64] = {};
+  EXPECT_EQ(len1, internal_read(fd, buf, len1));
+  EXPECT_EQ(0, internal_memcmp(buf, str1, len1));
+  EXPECT_EQ((char)0, buf[len1 + 1]);
+  internal_memset(buf, 0, len1);
+  EXPECT_EQ(len2, internal_read(fd, buf, len2));
+  EXPECT_EQ(0, internal_memcmp(buf, str2, len2));
+  internal_close(fd);
+}
diff --git a/lib/sanitizer_common/tests/sanitizer_linux_test.cc b/lib/sanitizer_common/tests/sanitizer_linux_test.cc
new file mode 100644
index 0000000..e559e90
--- /dev/null
+++ b/lib/sanitizer_common/tests/sanitizer_linux_test.cc
@@ -0,0 +1,114 @@
+//===-- sanitizer_linux_test.cc -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tests for sanitizer_linux.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __linux__
+
+#include "sanitizer_common/sanitizer_linux.h"
+#include "gtest/gtest.h"
+
+#include "sanitizer_common/sanitizer_common.h"
+
+#include <pthread.h>
+#include <sched.h>
+
+#include <algorithm>
+#include <set>
+
+namespace __sanitizer {
+static pthread_cond_t thread_exit_cond;
+static pthread_mutex_t thread_exit_mutex;
+static pthread_cond_t tid_reported_cond;
+static pthread_mutex_t tid_reported_mutex;
+static bool thread_exit;
+
+void *TIDReporterThread(void *tid_storage) {
+  pthread_mutex_lock(&tid_reported_mutex);
+  *(pid_t *)tid_storage = GetTid();
+  pthread_cond_broadcast(&tid_reported_cond);
+  pthread_mutex_unlock(&tid_reported_mutex);
+
+  pthread_mutex_lock(&thread_exit_mutex);
+  while (!thread_exit)
+    pthread_cond_wait(&thread_exit_cond, &thread_exit_mutex);
+  pthread_mutex_unlock(&thread_exit_mutex);
+  return NULL;
+}
+
+// The set of TIDs produced by ThreadLister should include the TID of every
+// thread we spawn here. The two sets may differ if there are other threads
+// running in the current process that we are not aware of.
+// Calling ThreadLister::Reset() should not change this.
+TEST(SanitizerLinux, ThreadListerMultiThreaded) {
+  pthread_mutex_init(&thread_exit_mutex, NULL);
+  pthread_mutex_init(&tid_reported_mutex, NULL);
+  pthread_cond_init(&thread_exit_cond, NULL);
+  pthread_cond_init(&tid_reported_cond, NULL);
+  const uptr kThreadCount = 20; // does not include the main thread
+  pthread_t thread_ids[kThreadCount];
+  pid_t  thread_tids[kThreadCount];
+  pid_t pid = getpid();
+  pid_t self_tid = GetTid();
+  thread_exit = false;
+  pthread_mutex_lock(&tid_reported_mutex);
+  for (uptr i = 0; i < kThreadCount; i++) {
+    int pthread_create_result;
+    thread_tids[i] = -1;
+    pthread_create_result = pthread_create(&thread_ids[i], NULL,
+                                           TIDReporterThread,
+                                           &thread_tids[i]);
+    ASSERT_EQ(pthread_create_result, 0);
+    while (thread_tids[i] == -1)
+      pthread_cond_wait(&tid_reported_cond, &tid_reported_mutex);
+  }
+  pthread_mutex_unlock(&tid_reported_mutex);
+  std::set<pid_t> reported_tids(thread_tids, thread_tids + kThreadCount);
+  reported_tids.insert(self_tid);
+
+  ThreadLister thread_lister(pid);
+  // There's a Reset() call between the first and second iteration.
+  for (uptr i = 0; i < 2; i++) {
+    std::set<pid_t> listed_tids;
+
+    EXPECT_FALSE(thread_lister.error());
+    for (uptr i = 0; i < kThreadCount + 1; i++) {
+      pid_t tid = thread_lister.GetNextTID();
+      EXPECT_GE(tid, 0);
+      EXPECT_FALSE(thread_lister.error());
+      listed_tids.insert(tid);
+    }
+    pid_t tid = thread_lister.GetNextTID();
+    EXPECT_LT(tid, 0);
+    EXPECT_FALSE(thread_lister.error());
+
+    std::set<pid_t> intersection;
+    std::set_intersection(reported_tids.begin(), reported_tids.end(),
+                          listed_tids.begin(), listed_tids.end(),
+                          std::inserter(intersection, intersection.begin()));
+    EXPECT_EQ(intersection, reported_tids);
+    thread_lister.Reset();
+  }
+
+  pthread_mutex_lock(&thread_exit_mutex);
+  thread_exit = true;
+  pthread_cond_broadcast(&thread_exit_cond);
+  pthread_mutex_unlock(&thread_exit_mutex);
+  for (uptr i = 0; i < kThreadCount; i++)
+    pthread_join(thread_ids[i], NULL);
+  pthread_mutex_destroy(&thread_exit_mutex);
+  pthread_mutex_destroy(&tid_reported_mutex);
+  pthread_cond_destroy(&thread_exit_cond);
+  pthread_cond_destroy(&tid_reported_cond);
+}
+}  // namespace __sanitizer
+
+#endif  // __linux__
diff --git a/lib/sanitizer_common/tests/sanitizer_list_test.cc b/lib/sanitizer_common/tests/sanitizer_list_test.cc
index d328fbf..fbe53c0 100644
--- a/lib/sanitizer_common/tests/sanitizer_list_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_list_test.cc
@@ -21,8 +21,7 @@
 
 typedef IntrusiveList<ListItem> List;
 
-// Check that IntrusiveList can be made thread-local.
-static THREADLOCAL List static_list;
+static List static_list;
 
 static void SetList(List *l, ListItem *x = 0,
                     ListItem *y = 0, ListItem *z = 0) {
@@ -154,4 +153,21 @@
   CHECK(l2.empty());
 }
 
+TEST(SanitizerCommon, IntrusiveListAppendEmpty) {
+  ListItem i;
+  List l;
+  l.clear();
+  l.push_back(&i);
+  List l2;
+  l2.clear();
+  l.append_back(&l2);
+  CHECK_EQ(l.back(), &i);
+  CHECK_EQ(l.front(), &i);
+  CHECK_EQ(l.size(), 1);
+  l.append_front(&l2);
+  CHECK_EQ(l.back(), &i);
+  CHECK_EQ(l.front(), &i);
+  CHECK_EQ(l.size(), 1);
+}
+
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/tests/sanitizer_mutex_test.cc b/lib/sanitizer_common/tests/sanitizer_mutex_test.cc
new file mode 100644
index 0000000..1dc9bef
--- /dev/null
+++ b/lib/sanitizer_common/tests/sanitizer_mutex_test.cc
@@ -0,0 +1,135 @@
+//===-- sanitizer_mutex_test.cc -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "gtest/gtest.h"
+
+#include <string.h>
+
+namespace __sanitizer {
+
+template<typename MutexType>
+class TestData {
+ public:
+  explicit TestData(MutexType *mtx)
+      : mtx_(mtx) {
+    for (int i = 0; i < kSize; i++)
+      data_[i] = 0;
+  }
+
+  void Write() {
+    Lock l(mtx_);
+    T v0 = data_[0];
+    for (int i = 0; i < kSize; i++) {
+      CHECK_EQ(data_[i], v0);
+      data_[i]++;
+    }
+  }
+
+  void TryWrite() {
+    if (!mtx_->TryLock())
+      return;
+    T v0 = data_[0];
+    for (int i = 0; i < kSize; i++) {
+      CHECK_EQ(data_[i], v0);
+      data_[i]++;
+    }
+    mtx_->Unlock();
+  }
+
+  void Backoff() {
+    volatile T data[kSize] = {};
+    for (int i = 0; i < kSize; i++) {
+      data[i]++;
+      CHECK_EQ(data[i], 1);
+    }
+  }
+
+ private:
+  typedef GenericScopedLock<MutexType> Lock;
+  static const int kSize = 64;
+  typedef u64 T;
+  MutexType *mtx_;
+  char pad_[kCacheLineSize];
+  T data_[kSize];
+};
+
+const int kThreads = 8;
+const int kWriteRate = 1024;
+#if SANITIZER_DEBUG
+const int kIters = 16*1024;
+#else
+const int kIters = 64*1024;
+#endif
+
+template<typename MutexType>
+static void *lock_thread(void *param) {
+  TestData<MutexType> *data = (TestData<MutexType>*)param;
+  for (int i = 0; i < kIters; i++) {
+    data->Write();
+    data->Backoff();
+  }
+  return 0;
+}
+
+template<typename MutexType>
+static void *try_thread(void *param) {
+  TestData<MutexType> *data = (TestData<MutexType>*)param;
+  for (int i = 0; i < kIters; i++) {
+    data->TryWrite();
+    data->Backoff();
+  }
+  return 0;
+}
+
+template<typename MutexType>
+static void check_locked(MutexType *mtx) {
+  GenericScopedLock<MutexType> l(mtx);
+  mtx->CheckLocked();
+}
+
+TEST(SanitizerCommon, SpinMutex) {
+  SpinMutex mtx;
+  mtx.Init();
+  TestData<SpinMutex> data(&mtx);
+  pthread_t threads[kThreads];
+  for (int i = 0; i < kThreads; i++)
+    pthread_create(&threads[i], 0, lock_thread<SpinMutex>, &data);
+  for (int i = 0; i < kThreads; i++)
+    pthread_join(threads[i], 0);
+}
+
+TEST(SanitizerCommon, SpinMutexTry) {
+  SpinMutex mtx;
+  mtx.Init();
+  TestData<SpinMutex> data(&mtx);
+  pthread_t threads[kThreads];
+  for (int i = 0; i < kThreads; i++)
+    pthread_create(&threads[i], 0, try_thread<SpinMutex>, &data);
+  for (int i = 0; i < kThreads; i++)
+    pthread_join(threads[i], 0);
+}
+
+TEST(SanitizerCommon, BlockingMutex) {
+  u64 mtxmem[1024] = {};
+  BlockingMutex *mtx = new(mtxmem) BlockingMutex(LINKER_INITIALIZED);
+  TestData<BlockingMutex> data(mtx);
+  pthread_t threads[kThreads];
+  for (int i = 0; i < kThreads; i++)
+    pthread_create(&threads[i], 0, lock_thread<BlockingMutex>, &data);
+  for (int i = 0; i < kThreads; i++)
+    pthread_join(threads[i], 0);
+  check_locked(mtx);
+}
+
+}  // namespace __sanitizer
diff --git a/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc b/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc
new file mode 100644
index 0000000..1df2bcf
--- /dev/null
+++ b/lib/sanitizer_common/tests/sanitizer_scanf_interceptor_test.cc
@@ -0,0 +1,178 @@
+//===-- sanitizer_scanf_interceptor_test.cc -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tests for *scanf interceptors implementation in sanitizer_common.
+//
+//===----------------------------------------------------------------------===//
+#include <vector>
+
+#include "interception/interception.h"
+#include "sanitizer_test_utils.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "gtest/gtest.h"
+
+using namespace __sanitizer;
+
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                         \
+  ((std::vector<unsigned> *)ctx)->push_back(size)
+
+#include "sanitizer_common/sanitizer_common_interceptors_scanf.inc"
+
+static const char scanf_buf[] = "Test string.";
+static size_t scanf_buf_size = sizeof(scanf_buf);
+static const unsigned SCANF_ARGS_MAX = 16;
+
+static void testScanf3(void *ctx, int result, bool allowGnuMalloc,
+                       const char *format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  scanf_common(ctx, result, allowGnuMalloc, format, ap);
+  va_end(ap);
+}
+
+static void testScanf2(const char *format, int scanf_result,
+                       bool allowGnuMalloc, unsigned n,
+                       va_list expected_sizes) {
+  std::vector<unsigned> scanf_sizes;
+  // 16 args should be enough.
+  testScanf3((void *)&scanf_sizes, scanf_result, allowGnuMalloc, format,
+             scanf_buf, scanf_buf, scanf_buf, scanf_buf, scanf_buf, scanf_buf,
+             scanf_buf, scanf_buf, scanf_buf, scanf_buf, scanf_buf, scanf_buf,
+             scanf_buf, scanf_buf, scanf_buf, scanf_buf);
+  ASSERT_EQ(n, scanf_sizes.size()) << "Unexpected number of format arguments: '"
+                                   << format << "'";
+  for (unsigned i = 0; i < n; ++i)
+    EXPECT_EQ(va_arg(expected_sizes, unsigned), scanf_sizes[i])
+        << "Unexpect write size for argument " << i << ", format string '"
+        << format << "'";
+}
+
+static void testScanf(const char *format, unsigned n, ...) {
+  va_list ap;
+  va_start(ap, n);
+  testScanf2(format, SCANF_ARGS_MAX, /* allowGnuMalloc */ true, n, ap);
+  va_end(ap);
+}
+
+static void testScanfPartial(const char *format, int scanf_result, unsigned n,
+                             ...) {
+  va_list ap;
+  va_start(ap, n);
+  testScanf2(format, scanf_result, /* allowGnuMalloc */ true,  n, ap);
+  va_end(ap);
+}
+
+static void testScanfNoGnuMalloc(const char *format, unsigned n, ...) {
+  va_list ap;
+  va_start(ap, n);
+  testScanf2(format, SCANF_ARGS_MAX, /* allowGnuMalloc */ false, n, ap);
+  va_end(ap);
+}
+
+TEST(SanitizerCommonInterceptors, Scanf) {
+  const unsigned I = sizeof(int);          // NOLINT
+  const unsigned L = sizeof(long);         // NOLINT
+  const unsigned LL = sizeof(long long);   // NOLINT
+  const unsigned S = sizeof(short);        // NOLINT
+  const unsigned C = sizeof(char);         // NOLINT
+  const unsigned D = sizeof(double);       // NOLINT
+  const unsigned LD = sizeof(long double); // NOLINT
+  const unsigned F = sizeof(float);        // NOLINT
+  const unsigned P = sizeof(char *);       // NOLINT
+
+  testScanf("%d", 1, I);
+  testScanf("%d%d%d", 3, I, I, I);
+  testScanf("ab%u%dc", 2, I, I);
+  testScanf("%ld", 1, L);
+  testScanf("%llu", 1, LL);
+  testScanf("a %hd%hhx", 2, S, C);
+  testScanf("%c", 1, C);
+
+  testScanf("%%", 0);
+  testScanf("a%%", 0);
+  testScanf("a%%b", 0);
+  testScanf("a%%%%b", 0);
+  testScanf("a%%b%%", 0);
+  testScanf("a%%%%%%b", 0);
+  testScanf("a%%%%%b", 0);
+  testScanf("a%%%%%f", 1, F);
+  testScanf("a%%%lxb", 1, L);
+  testScanf("a%lf%%%lxb", 2, D, L);
+  testScanf("%nf", 1, I);
+
+  testScanf("%10s", 1, 11);
+  testScanf("%10c", 1, 10);
+  testScanf("%%10s", 0);
+  testScanf("%*10s", 0);
+  testScanf("%*d", 0);
+
+  testScanf("%4d%8f%c", 3, I, F, C);
+  testScanf("%s%d", 2, scanf_buf_size, I);
+  testScanf("%[abc]", 1, scanf_buf_size);
+  testScanf("%4[bcdef]", 1, 5);
+  testScanf("%[]]", 1, scanf_buf_size);
+  testScanf("%8[^]%d0-9-]%c", 2, 9, C);
+
+  testScanf("%*[^:]%n:%d:%1[ ]%n", 4, I, I, 2, I);
+
+  testScanf("%*d%u", 1, I);
+
+  testScanf("%c%d", 2, C, I);
+  testScanf("%A%lf", 2, F, D);
+
+  testScanf("%ms %Lf", 2, P, LD);
+  testScanf("s%Las", 1, LD);
+  testScanf("%ar", 1, F);
+
+  // In the cases with std::min below the format spec can be interpreted as
+  // either floating-something, or (GNU extension) callee-allocated string.
+  // Our conservative implementation reports one of the two possibilities with
+  // the least store range.
+  testScanf("%a[", 0);
+  testScanf("%a[]", 0);
+  testScanf("%a[]]", 1, std::min(F, P));
+  testScanf("%a[abc]", 1, std::min(F, P));
+  testScanf("%a[^abc]", 1, std::min(F, P));
+  testScanf("%a[ab%c] %d", 0);
+  testScanf("%a[^ab%c] %d", 0);
+  testScanf("%as", 1, std::min(F, P));
+  testScanf("%aS", 1, std::min(F, P));
+  testScanf("%a13S", 1, std::min(F, P));
+  testScanf("%alS", 1, std::min(F, P));
+
+  testScanfNoGnuMalloc("s%Las", 1, LD);
+  testScanfNoGnuMalloc("%ar", 1, F);
+  testScanfNoGnuMalloc("%a[", 1, F);
+  testScanfNoGnuMalloc("%a[]", 1, F);
+  testScanfNoGnuMalloc("%a[]]", 1, F);
+  testScanfNoGnuMalloc("%a[abc]", 1, F);
+  testScanfNoGnuMalloc("%a[^abc]", 1, F);
+  testScanfNoGnuMalloc("%a[ab%c] %d", 3, F, C, I);
+  testScanfNoGnuMalloc("%a[^ab%c] %d", 3, F, C, I);
+  testScanfNoGnuMalloc("%as", 1, F);
+  testScanfNoGnuMalloc("%aS", 1, F);
+  testScanfNoGnuMalloc("%a13S", 1, F);
+  testScanfNoGnuMalloc("%alS", 1, F);
+
+  testScanf("%5$d", 0);
+  testScanf("%md", 0);
+  testScanf("%m10s", 0);
+
+  testScanfPartial("%d%d%d%d //1\n", 1, 1, I);
+  testScanfPartial("%d%d%d%d //2\n", 2, 2, I, I);
+  testScanfPartial("%d%d%d%d //3\n", 3, 3, I, I, I);
+  testScanfPartial("%d%d%d%d //4\n", 4, 4, I, I, I, I);
+
+  testScanfPartial("%d%n%n%d //1\n", 1, 1, I);
+  testScanfPartial("%d%n%n%d //2\n", 2, 4, I, I, I, I);
+
+  testScanfPartial("%d%n%n%d %s %s", 3, 5, I, I, I, I, scanf_buf_size);
+  testScanfPartial("%d%n%n%d %s %s", 4, 6, I, I, I, I, scanf_buf_size,
+                   scanf_buf_size);
+}
diff --git a/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cc b/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cc
new file mode 100644
index 0000000..2025255
--- /dev/null
+++ b/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cc
@@ -0,0 +1,78 @@
+//===-- sanitizer_stacktrace_test.cc --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "gtest/gtest.h"
+
+namespace __sanitizer {
+
+class FastUnwindTest : public ::testing::Test {
+ protected:
+  virtual void SetUp();
+
+  uptr fake_stack[10];
+  uptr start_pc;
+  uptr fake_top;
+  uptr fake_bottom;
+  StackTrace trace;
+};
+
+void FastUnwindTest::SetUp() {
+  // Fill an array of pointers with fake fp+retaddr pairs.  Frame pointers have
+  // even indices.
+  for (uptr i = 0; i+1 < ARRAY_SIZE(fake_stack); i += 2) {
+    fake_stack[i] = (uptr)&fake_stack[i+2];  // fp
+    fake_stack[i+1] = i+1; // retaddr
+  }
+  // Mark the last fp as zero to terminate the stack trace.
+  fake_stack[RoundDownTo(ARRAY_SIZE(fake_stack) - 1, 2)] = 0;
+
+  // Top is two slots past the end because FastUnwindStack subtracts two.
+  fake_top = (uptr)&fake_stack[ARRAY_SIZE(fake_stack) + 2];
+  // Bottom is one slot before the start because FastUnwindStack uses >.
+  fake_bottom = (uptr)&fake_stack[-1];
+  start_pc = 0;
+
+  // This is common setup done by __asan::GetStackTrace().
+  trace.size = 0;
+  trace.max_size = ARRAY_SIZE(fake_stack);
+  trace.trace[0] = start_pc;
+}
+
+TEST_F(FastUnwindTest, Basic) {
+  trace.FastUnwindStack(start_pc, (uptr)&fake_stack[0],
+                        fake_top, fake_bottom);
+  // Should get all on-stack retaddrs and start_pc.
+  EXPECT_EQ(6U, trace.size);
+  EXPECT_EQ(start_pc, trace.trace[0]);
+  for (uptr i = 1; i <= 5; i++) {
+    EXPECT_EQ(i*2 - 1, trace.trace[i]);
+  }
+}
+
+// From: http://code.google.com/p/address-sanitizer/issues/detail?id=162
+TEST_F(FastUnwindTest, FramePointerLoop) {
+  // Make one fp point to itself.
+  fake_stack[4] = (uptr)&fake_stack[4];
+  trace.FastUnwindStack(start_pc, (uptr)&fake_stack[0],
+                        fake_top, fake_bottom);
+  // Should get all on-stack retaddrs up to the 4th slot and start_pc.
+  EXPECT_EQ(4U, trace.size);
+  EXPECT_EQ(start_pc, trace.trace[0]);
+  for (uptr i = 1; i <= 3; i++) {
+    EXPECT_EQ(i*2 - 1, trace.trace[i]);
+  }
+}
+
+}  // namespace __sanitizer
diff --git a/lib/sanitizer_common/tests/sanitizer_test_utils.h b/lib/sanitizer_common/tests/sanitizer_test_utils.h
new file mode 100644
index 0000000..a770d0f
--- /dev/null
+++ b/lib/sanitizer_common/tests/sanitizer_test_utils.h
@@ -0,0 +1,82 @@
+//===-- sanitizer_test_utils.h ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of *Sanitizer runtime.
+// Common unit tests utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_TEST_UTILS_H
+#define SANITIZER_TEST_UTILS_H
+
+#if defined(_WIN32)
+typedef unsigned __int8  uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int8           int8_t;
+typedef __int16          int16_t;
+typedef __int32          int32_t;
+typedef __int64          int64_t;
+# define NOINLINE __declspec(noinline)
+# define USED
+#else  // defined(_WIN32)
+# define NOINLINE __attribute__((noinline))
+# define USED __attribute__((used))
+#include <stdint.h>
+#endif  // defined(_WIN32)
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+
+#ifndef ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
+# if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+#  define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
+    __attribute__((no_sanitize_address))
+# else
+#  define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
+# endif
+#endif  // ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
+
+#if __LP64__ || defined(_WIN64)
+#  define SANITIZER_WORDSIZE 64
+#else
+#  define SANITIZER_WORDSIZE 32
+#endif
+
+// Make the compiler thinks that something is going on there.
+inline void break_optimization(void *arg) {
+  __asm__ __volatile__("" : : "r" (arg) : "memory");
+}
+
+// This function returns its parameter but in such a way that compiler
+// can not prove it.
+template<class T>
+NOINLINE
+static T Ident(T t) {
+  T ret = t;
+  break_optimization(&ret);
+  return ret;
+}
+
+// Simple stand-alone pseudorandom number generator.
+// Current algorithm is ANSI C linear congruential PRNG.
+static inline uint32_t my_rand_r(uint32_t* state) {
+  return (*state = *state * 1103515245 + 12345) >> 16;
+}
+
+static uint32_t global_seed = 0;
+
+static inline uint32_t my_rand() {
+  return my_rand_r(&global_seed);
+}
+
+
+#endif  // SANITIZER_TEST_UTILS_H
diff --git a/lib/sanitizer_common/tests/standalone_malloc_test.cc b/lib/sanitizer_common/tests/standalone_malloc_test.cc
new file mode 100644
index 0000000..9e6f7c9
--- /dev/null
+++ b/lib/sanitizer_common/tests/standalone_malloc_test.cc
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include <vector>
+#include <pthread.h>
+#include <malloc.h>
+#include <algorithm>
+
+using namespace std;
+
+const size_t kNumThreds = 16;
+const size_t kNumIters = 1 << 23;
+
+inline void break_optimization(void *arg) {
+  __asm__ __volatile__("" : : "r" (arg) : "memory");
+}
+
+__attribute__((noinline))
+static void *MallocThread(void *t) {
+  size_t total_malloced = 0, total_freed = 0;
+  size_t max_in_use = 0;
+  size_t tid = reinterpret_cast<size_t>(t);
+  vector<pair<char *, size_t> > allocated;
+  allocated.reserve(kNumIters);
+  for (size_t i = 1; i < kNumIters; i++) {
+    if ((i % (kNumIters / 4)) == 0 && tid == 0)
+      fprintf(stderr, "   T[%ld] iter %ld\n", tid, i);
+    bool allocate = (i % 5) <= 2;  // 60% malloc, 40% free
+    if (i > kNumIters / 4)
+      allocate = i % 2;  // then switch to 50% malloc, 50% free
+    if (allocate) {
+      size_t size = 1 + (i % 200);
+      if ((i % 10001) == 0)
+        size *= 4096;
+      total_malloced += size;
+      char *x = new char[size];
+      x[0] = x[size - 1] = x[size / 2] = 0;
+      allocated.push_back(make_pair(x, size));
+      max_in_use = max(max_in_use, total_malloced - total_freed);
+    } else {
+      if (allocated.empty()) continue;
+      size_t slot = i % allocated.size();
+      char *p = allocated[slot].first;
+      p[0] = 0;  // emulate last user touch of the block
+      size_t size = allocated[slot].second;
+      total_freed += size;
+      swap(allocated[slot], allocated.back());
+      allocated.pop_back();
+      delete [] p;
+    }
+  }
+  if (tid == 0)
+    fprintf(stderr, "   T[%ld] total_malloced: %ldM in use %ldM max %ldM\n",
+           tid, total_malloced >> 20, (total_malloced - total_freed) >> 20,
+           max_in_use >> 20);
+  for (size_t i = 0; i < allocated.size(); i++)
+    delete [] allocated[i].first;
+  return 0;
+}
+
+template <int depth>
+struct DeepStack {
+  __attribute__((noinline))
+  static void *run(void *t) {
+    break_optimization(0);
+    DeepStack<depth - 1>::run(t);
+    break_optimization(0);
+    return 0;
+  }
+};
+
+template<>
+struct DeepStack<0> {
+  static void *run(void *t) {
+    MallocThread(t);
+    return 0;
+  }
+};
+
+// Build with -Dstandalone_malloc_test=main to make it a separate program.
+int standalone_malloc_test() {
+  pthread_t t[kNumThreds];
+  for (size_t i = 0; i < kNumThreds; i++)
+    pthread_create(&t[i], 0, DeepStack<200>::run, reinterpret_cast<void *>(i));
+  for (size_t i = 0; i < kNumThreds; i++)
+    pthread_join(t[i], 0);
+  malloc_stats();
+  return 0;
+}
diff --git a/lib/tsan/Makefile.old b/lib/tsan/Makefile.old
index 1273a78..a492eab 100644
--- a/lib/tsan/Makefile.old
+++ b/lib/tsan/Makefile.old
@@ -1,6 +1,6 @@
 DEBUG=0
 LDFLAGS=-ldl -lpthread -pie
-CXXFLAGS = -fPIE -g -Wall -Werror -DTSAN_DEBUG=$(DEBUG)
+CXXFLAGS = -fPIE -g -Wall -Werror -DTSAN_DEBUG=$(DEBUG) -DSANITIZER_DEBUG=$(DEBUG)
 # Silence warnings that Clang produces for gtest code.
 # Use -Wno-attributes so that gcc doesn't complain about unknown warning types.
 CXXFLAGS += -Wno-attributes
@@ -8,14 +8,15 @@
 	CXXFLAGS += -O3
 endif
 ifeq ($(CXX), clang++)
-  CXXFLAGS+= -Wno-unused-private-field -Wno-static-in-inline
+  CXXFLAGS+= -Wno-unused-private-field -Wno-static-in-inline -Wgnu
 endif
 
 LIBTSAN=rtl/libtsan.a
 GTEST_ROOT=third_party/googletest
 GTEST_INCLUDE=-I$(GTEST_ROOT)/include
 GTEST_BUILD_DIR=$(GTEST_ROOT)/build
-GTEST_LIB=$(GTEST_BUILD_DIR)/gtest-all.o
+GTEST_LIB_NAME=gtest-all.o
+GTEST_LIB=$(GTEST_BUILD_DIR)/$(GTEST_LIB_NAME)
 
 SANITIZER_COMMON_TESTS_SRC=$(wildcard ../sanitizer_common/tests/*_test.cc)
 SANITIZER_COMMON_TESTS_OBJ=$(patsubst %.cc,%.o,$(SANITIZER_COMMON_TESTS_SRC))
@@ -35,7 +36,7 @@
 	@ echo "  make install_deps  # Install third-party dependencies required for building"
 	@ echo "  make presubmit     # Run it every time before committing"
 	@ echo
-	@ echo "For more info, see http://code.google.com/p/data-race-test/wiki/ThreadSanitizer2"
+	@ echo "For more info, see http://code.google.com/p/thread-sanitizer/wiki/Development"
 
 $(LIBTSAN): libtsan
 
@@ -56,6 +57,7 @@
 	./lit_tests/test_output.sh
 
 presubmit:
+	../sanitizer_common/scripts/check_lint.sh
 	# Debug build with clang.
 	$(MAKE) -f Makefile.old clean
 	$(MAKE) -f Makefile.old run DEBUG=1 -j 16 CC=clang CXX=clang++
@@ -69,6 +71,10 @@
 	$(MAKE) -f Makefile.old clean
 	$(MAKE) -f Makefile.old run DEBUG=0 -j 16 CC=gcc CXX=g++
 	./check_analyze.sh
+	# Sanity check for Go runtime
+	(cd go && ./buildgo.sh)
+	# Check cmake build
+	./check_cmake.sh
 	@ echo PRESUBMIT PASSED
 
 install_deps:
@@ -81,9 +87,11 @@
 $(GTEST_LIB):
 	mkdir -p $(GTEST_BUILD_DIR) && \
 	cd $(GTEST_BUILD_DIR) && \
-	$(MAKE) -f ../make/Makefile CXXFLAGS="$(CXXFLAGS)" CFLAGS="$(CFLAGS)" CC=$(CC) CXX=$(CXX)
+	$(MAKE) -f ../make/Makefile CXXFLAGS="$(CXXFLAGS)" CFLAGS="$(CFLAGS)" CC=$(CC) CXX=$(CXX) $(GTEST_LIB_NAME)
 
 clean:
 	rm -f asm_*.s libtsan.nm libtsan.objdump */*.o tsan_test
 	rm -rf $(GTEST_BUILD_DIR)
 	$(MAKE) clean -C rtl -f Makefile.old
+	rm -f go/*.s
+	rm -rf build
diff --git a/lib/tsan/check_cmake.sh b/lib/tsan/check_cmake.sh
new file mode 100755
index 0000000..2d84b76
--- /dev/null
+++ b/lib/tsan/check_cmake.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -u
+set -e
+
+ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+mkdir -p $ROOT/build
+cd $ROOT/build
+CC=clang CXX=clang++ cmake -DLLVM_ENABLE_WERROR=ON -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON $ROOT/../../../..
+make -j64
+make check-sanitizer check-tsan check-asan -j64
+
diff --git a/lib/tsan/go/buildgo.sh b/lib/tsan/go/buildgo.sh
index dc4a238..a153afd 100755
--- a/lib/tsan/go/buildgo.sh
+++ b/lib/tsan/go/buildgo.sh
@@ -1,6 +1,5 @@
 #!/bin/bash
 set -e
-set -x
 
 SRCS="
 	tsan_go.cc
diff --git a/lib/tsan/go/test.c b/lib/tsan/go/test.c
index 2414a1e..902dfc9 100644
--- a/lib/tsan/go/test.c
+++ b/lib/tsan/go/test.c
@@ -13,20 +13,20 @@
 
 #include <stdio.h>
 
-void __tsan_init();
+void __tsan_init(void **thr);
 void __tsan_fini();
 void __tsan_map_shadow(void *addr, unsigned long size);
-void __tsan_go_start(int pgoid, int chgoid, void *pc);
-void __tsan_go_end(int goid);
-void __tsan_read(int goid, void *addr, void *pc);
-void __tsan_write(int goid, void *addr, void *pc);
-void __tsan_func_enter(int goid, void *pc);
-void __tsan_func_exit(int goid);
-void __tsan_malloc(int goid, void *p, unsigned long sz, void *pc);
+void __tsan_go_start(void *thr, void **chthr, void *pc);
+void __tsan_go_end(void *thr);
+void __tsan_read(void *thr, void *addr, void *pc);
+void __tsan_write(void *thr, void *addr, void *pc);
+void __tsan_func_enter(void *thr, void *pc);
+void __tsan_func_exit(void *thr);
+void __tsan_malloc(void *thr, void *p, unsigned long sz, void *pc);
 void __tsan_free(void *p);
-void __tsan_acquire(int goid, void *addr);
-void __tsan_release(int goid, void *addr);
-void __tsan_release_merge(int goid, void *addr);
+void __tsan_acquire(void *thr, void *addr);
+void __tsan_release(void *thr, void *addr);
+void __tsan_release_merge(void *thr, void *addr);
 
 int __tsan_symbolize(void *pc, char **img, char **rtn, char **file, int *l) {
   return 0;
@@ -35,19 +35,21 @@
 char buf[10];
 
 int main(void) {
-  __tsan_init();
+  void *thr0 = 0;
+  __tsan_init(&thr0);
   __tsan_map_shadow(buf, sizeof(buf) + 4096);
-  __tsan_func_enter(0, &main);
-  __tsan_malloc(0, buf, 10, 0);
-  __tsan_release(0, buf);
-  __tsan_release_merge(0, buf);
-  __tsan_go_start(0, 1, 0);
-  __tsan_write(1, buf, 0);
-  __tsan_acquire(1, buf);
-  __tsan_go_end(1);
-  __tsan_read(0, buf, 0);
+  __tsan_func_enter(thr0, &main);
+  __tsan_malloc(thr0, buf, 10, 0);
+  __tsan_release(thr0, buf);
+  __tsan_release_merge(thr0, buf);
+  void *thr1 = 0;
+  __tsan_go_start(thr0, &thr1, 0);
+  __tsan_write(thr1, buf, 0);
+  __tsan_acquire(thr1, buf);
+  __tsan_go_end(thr1);
+  __tsan_read(thr0, buf, 0);
   __tsan_free(buf);
-  __tsan_func_exit(0);
+  __tsan_func_exit(thr0);
   __tsan_fini();
   return 0;
 }
diff --git a/lib/tsan/go/tsan_go.cc b/lib/tsan/go/tsan_go.cc
index cfbe257..b2aa622 100644
--- a/lib/tsan/go/tsan_go.cc
+++ b/lib/tsan/go/tsan_go.cc
@@ -18,10 +18,6 @@
 
 namespace __tsan {
 
-const int kMaxGoroutinesEver = 128*1024;
-
-static ThreadState *goroutines[kMaxGoroutinesEver];
-
 void InitializeInterceptors() {
 }
 
@@ -35,7 +31,7 @@
 void internal_start_thread(void(*func)(void*), void *arg) {
 }
 
-ReportStack *SymbolizeData(uptr addr) {
+ReportLocation *SymbolizeData(uptr addr) {
   return 0;
 }
 
@@ -80,20 +76,18 @@
 
 extern "C" {
 
-static void AllocGoroutine(int tid) {
-  if (tid >= kMaxGoroutinesEver) {
-    Printf("FATAL: Reached goroutine limit\n");
-    Die();
-  }
+static ThreadState *main_thr;
+
+static ThreadState *AllocGoroutine() {
   ThreadState *thr = (ThreadState*)internal_alloc(MBlockThreadContex,
       sizeof(ThreadState));
   internal_memset(thr, 0, sizeof(*thr));
-  goroutines[tid] = thr;
+  return thr;
 }
 
-void __tsan_init() {
-  AllocGoroutine(0);
-  ThreadState *thr = goroutines[0];
+void __tsan_init(ThreadState **thrp) {
+  ThreadState *thr = AllocGoroutine();
+  main_thr = *thrp = thr;
   thr->in_rtl++;
   Initialize(thr);
   thr->in_rtl--;
@@ -101,7 +95,7 @@
 
 void __tsan_fini() {
   // FIXME: Not necessary thread 0.
-  ThreadState *thr = goroutines[0];
+  ThreadState *thr = main_thr;
   thr->in_rtl++;
   int res = Finalize(thr);
   thr->in_rtl--;
@@ -112,44 +106,37 @@
   MapShadow(addr, size);
 }
 
-void __tsan_read(int goid, void *addr, void *pc) {
-  ThreadState *thr = goroutines[goid];
-  MemoryAccess(thr, (uptr)pc, (uptr)addr, 0, false);
+void __tsan_read(ThreadState *thr, void *addr, void *pc) {
+  MemoryRead(thr, (uptr)pc, (uptr)addr, kSizeLog1);
 }
 
-void __tsan_write(int goid, void *addr, void *pc) {
-  ThreadState *thr = goroutines[goid];
-  MemoryAccess(thr, (uptr)pc, (uptr)addr, 0, true);
+void __tsan_write(ThreadState *thr, void *addr, void *pc) {
+  MemoryWrite(thr, (uptr)pc, (uptr)addr, kSizeLog1);
 }
 
-void __tsan_read_range(int goid, void *addr, uptr size, uptr step, void *pc) {
-  ThreadState *thr = goroutines[goid];
-  for (uptr i = 0; i < size; i += step)
-	  MemoryAccess(thr, (uptr)pc, (uptr)addr + i, 0, false);
+void __tsan_read_range(ThreadState *thr, void *addr, uptr size, uptr step,
+                       void *pc) {
+  MemoryAccessRangeStep(thr, (uptr)pc, (uptr)addr, size, step, false);
 }
 
-void __tsan_write_range(int goid, void *addr, uptr size, uptr step, void *pc) {
-  ThreadState *thr = goroutines[goid];
-  for (uptr i = 0; i < size; i += step)
-	  MemoryAccess(thr, (uptr)pc, (uptr)addr + i, 0, true);
+void __tsan_write_range(ThreadState *thr, void *addr, uptr size, uptr step,
+                        void *pc) {
+  MemoryAccessRangeStep(thr, (uptr)pc, (uptr)addr, size, step, true);
 }
 
-void __tsan_func_enter(int goid, void *pc) {
-  ThreadState *thr = goroutines[goid];
+void __tsan_func_enter(ThreadState *thr, void *pc) {
   FuncEntry(thr, (uptr)pc);
 }
 
-void __tsan_func_exit(int goid) {
-  ThreadState *thr = goroutines[goid];
+void __tsan_func_exit(ThreadState *thr) {
   FuncExit(thr);
 }
 
-void __tsan_malloc(int goid, void *p, uptr sz, void *pc) {
-  ThreadState *thr = goroutines[goid];
+void __tsan_malloc(ThreadState *thr, void *p, uptr sz, void *pc) {
   if (thr == 0)  // probably before __tsan_init()
     return;
   thr->in_rtl++;
-  MemoryRangeImitateWrite(thr, (uptr)pc, (uptr)p, sz);
+  MemoryResetRange(thr, (uptr)pc, (uptr)p, sz);
   thr->in_rtl--;
 }
 
@@ -157,52 +144,43 @@
   (void)p;
 }
 
-void __tsan_go_start(int pgoid, int chgoid, void *pc) {
-  if (chgoid == 0)
-    return;
-  AllocGoroutine(chgoid);
-  ThreadState *thr = goroutines[chgoid];
-  ThreadState *parent = goroutines[pgoid];
+void __tsan_go_start(ThreadState *parent, ThreadState **pthr, void *pc) {
+  ThreadState *thr = AllocGoroutine();
+  *pthr = thr;
   thr->in_rtl++;
   parent->in_rtl++;
-  int goid2 = ThreadCreate(parent, (uptr)pc, 0, true);
-  ThreadStart(thr, goid2, 0);
+  int goid = ThreadCreate(parent, (uptr)pc, 0, true);
+  ThreadStart(thr, goid, 0);
   parent->in_rtl--;
   thr->in_rtl--;
 }
 
-void __tsan_go_end(int goid) {
-  ThreadState *thr = goroutines[goid];
+void __tsan_go_end(ThreadState *thr) {
   thr->in_rtl++;
   ThreadFinish(thr);
   thr->in_rtl--;
   internal_free(thr);
-  goroutines[goid] = 0;
 }
 
-void __tsan_acquire(int goid, void *addr) {
-  ThreadState *thr = goroutines[goid];
+void __tsan_acquire(ThreadState *thr, void *addr) {
   thr->in_rtl++;
   Acquire(thr, 0, (uptr)addr);
   thr->in_rtl--;
 }
 
-void __tsan_release(int goid, void *addr) {
-  ThreadState *thr = goroutines[goid];
+void __tsan_release(ThreadState *thr, void *addr) {
   thr->in_rtl++;
   ReleaseStore(thr, 0, (uptr)addr);
   thr->in_rtl--;
 }
 
-void __tsan_release_merge(int goid, void *addr) {
-  ThreadState *thr = goroutines[goid];
+void __tsan_release_merge(ThreadState *thr, void *addr) {
   thr->in_rtl++;
   Release(thr, 0, (uptr)addr);
   thr->in_rtl--;
 }
 
-void __tsan_finalizer_goroutine(int goid) {
-  ThreadState *thr = goroutines[goid];
+void __tsan_finalizer_goroutine(ThreadState *thr) {
   AcquireGlobal(thr, 0);
 }
 
diff --git a/lib/tsan/lit_tests/CMakeLists.txt b/lib/tsan/lit_tests/CMakeLists.txt
index 6dc90e2..ff2508d 100644
--- a/lib/tsan/lit_tests/CMakeLists.txt
+++ b/lib/tsan/lit_tests/CMakeLists.txt
@@ -8,9 +8,8 @@
   ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
   )
 
-if("${CMAKE_HOST_SYSTEM}" STREQUAL "${CMAKE_SYSTEM}")
-  # Run TSan output tests only if we're not cross-compiling,
-  # and can be sure that clang would produce working binaries.
+if(COMPILER_RT_CAN_EXECUTE_TESTS)
+  # Run TSan output tests only if we're sure we can produce working binaries.
   set(TSAN_TEST_DEPS
     clang clang-headers FileCheck count not llvm-symbolizer
     ${TSAN_RUNTIME_LIBRARIES}
@@ -28,7 +27,8 @@
     )
   set_target_properties(check-tsan PROPERTIES FOLDER "TSan unittests")
 elseif(LLVM_INCLUDE_TESTS)
-  # Otherwise run only TSan unit tests.
+  # Otherwise run only TSan unit tests (they are linked using the
+  # host compiler).
   add_lit_testsuite(check-tsan "Running ThreadSanitizer tests"
     ${CMAKE_CURRENT_BINARY_DIR}/Unit
     DEPENDS TsanUnitTests llvm-symbolizer)
diff --git a/lib/tsan/lit_tests/Helpers/blacklist.txt b/lib/tsan/lit_tests/Helpers/blacklist.txt
new file mode 100644
index 0000000..22225e5
--- /dev/null
+++ b/lib/tsan/lit_tests/Helpers/blacklist.txt
@@ -0,0 +1 @@
+fun:*Blacklisted_Thread2*
diff --git a/lib/tsan/lit_tests/Helpers/lit.local.cfg b/lib/tsan/lit_tests/Helpers/lit.local.cfg
new file mode 100644
index 0000000..9246b10
--- /dev/null
+++ b/lib/tsan/lit_tests/Helpers/lit.local.cfg
@@ -0,0 +1,2 @@
+# Files in this directory are helper files for other output tests.
+config.suffixes = []
diff --git a/lib/tsan/lit_tests/Unit/lit.site.cfg.in b/lib/tsan/lit_tests/Unit/lit.site.cfg.in
index 23654b9..420cdca 100644
--- a/lib/tsan/lit_tests/Unit/lit.site.cfg.in
+++ b/lib/tsan/lit_tests/Unit/lit.site.cfg.in
@@ -1,15 +1,16 @@
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
 
-config.build_type = "@CMAKE_BUILD_TYPE@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvm_build_mode = "@LLVM_BUILD_MODE@"
 
 # LLVM tools dir can be passed in lit parameters, so try to
 # apply substitution.
 try:
   config.llvm_tools_dir = config.llvm_tools_dir % lit.params
+  config.llvm_build_mode = config.llvm_build_mode % lit.params
 except KeyError,e:
   key, = e.args
   lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key, key))
diff --git a/lib/tsan/lit_tests/atomic_free.cc b/lib/tsan/lit_tests/atomic_free.cc
new file mode 100644
index 0000000..ba9bd5a
--- /dev/null
+++ b/lib/tsan/lit_tests/atomic_free.cc
@@ -0,0 +1,19 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <unistd.h>
+
+void *Thread(void *a) {
+  __atomic_fetch_add((int*)a, 1, __ATOMIC_SEQ_CST);
+  return 0;
+}
+
+int main() {
+  int *a = new int(0);
+  pthread_t t;
+  pthread_create(&t, 0, Thread, a);
+  sleep(1);
+  delete a;
+  pthread_join(t, 0);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/atomic_free2.cc b/lib/tsan/lit_tests/atomic_free2.cc
new file mode 100644
index 0000000..5517bf7
--- /dev/null
+++ b/lib/tsan/lit_tests/atomic_free2.cc
@@ -0,0 +1,19 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <unistd.h>
+
+void *Thread(void *a) {
+  sleep(1);
+  __atomic_fetch_add((int*)a, 1, __ATOMIC_SEQ_CST);
+  return 0;
+}
+
+int main() {
+  int *a = new int(0);
+  pthread_t t;
+  pthread_create(&t, 0, Thread, a);
+  delete a;
+  pthread_join(t, 0);
+}
+
+// CHECK: WARNING: ThreadSanitizer: heap-use-after-free
diff --git a/lib/tsan/lit_tests/atomic_norace.cc b/lib/tsan/lit_tests/atomic_norace.cc
new file mode 100644
index 0000000..265459b
--- /dev/null
+++ b/lib/tsan/lit_tests/atomic_norace.cc
@@ -0,0 +1,61 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+const int kTestCount = 4;
+typedef long long T;
+T atomics[kTestCount * 2];
+
+void Test(int test, T *p, bool main_thread) {
+  volatile T sink;
+  if (test == 0) {
+    if (main_thread)
+      __atomic_fetch_add(p, 1, __ATOMIC_RELAXED);
+    else
+      __atomic_fetch_add(p, 1, __ATOMIC_RELAXED);
+  } else if (test == 1) {
+    if (main_thread)
+      __atomic_exchange_n(p, 1, __ATOMIC_ACQ_REL);
+    else
+      __atomic_exchange_n(p, 1, __ATOMIC_ACQ_REL);
+  } else if (test == 2) {
+    if (main_thread)
+      sink = __atomic_load_n(p, __ATOMIC_SEQ_CST);
+    else
+      __atomic_store_n(p, 1, __ATOMIC_SEQ_CST);
+  } else if (test == 3) {
+    if (main_thread)
+      sink = __atomic_load_n(p, __ATOMIC_SEQ_CST);
+    else
+      sink = *p;
+  }
+}
+
+void *Thread(void *p) {
+  for (int i = 0; i < kTestCount; i++) {
+    Test(i, &atomics[i], false);
+  }
+  sleep(2);
+  for (int i = 0; i < kTestCount; i++) {
+    fprintf(stderr, "Test %d reverse\n", i);
+    Test(i, &atomics[kTestCount + i], false);
+  }
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  sleep(1);
+  for (int i = 0; i < kTestCount; i++) {
+    fprintf(stderr, "Test %d\n", i);
+    Test(i, &atomics[i], true);
+  }
+  for (int i = 0; i < kTestCount; i++) {
+    Test(i, &atomics[kTestCount + i], true);
+  }
+  pthread_join(t, 0);
+}
+
+// CHECK-NOT: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/atomic_race.cc b/lib/tsan/lit_tests/atomic_race.cc
new file mode 100644
index 0000000..360b812
--- /dev/null
+++ b/lib/tsan/lit_tests/atomic_race.cc
@@ -0,0 +1,80 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <unistd.h>
+#include <stdio.h>
+
+const int kTestCount = 4;
+typedef long long T;
+T atomics[kTestCount * 2];
+
+void Test(int test, T *p, bool main_thread) {
+  volatile T sink;
+  if (test == 0) {
+    if (main_thread)
+      __atomic_fetch_add(p, 1, __ATOMIC_RELAXED);
+    else
+      *p = 42;
+  } else if (test == 1) {
+    if (main_thread)
+      __atomic_fetch_add(p, 1, __ATOMIC_RELAXED);
+    else
+      sink = *p;
+  } else if (test == 2) {
+    if (main_thread)
+      sink = __atomic_load_n(p, __ATOMIC_SEQ_CST);
+    else
+      *p = 42;
+  } else if (test == 3) {
+    if (main_thread)
+      __atomic_store_n(p, 1, __ATOMIC_SEQ_CST);
+    else
+      sink = *p;
+  }
+}
+
+void *Thread(void *p) {
+  for (int i = 0; i < kTestCount; i++) {
+    Test(i, &atomics[i], false);
+  }
+  sleep(2);
+  for (int i = 0; i < kTestCount; i++) {
+    fprintf(stderr, "Test %d reverse\n", i);
+    Test(i, &atomics[kTestCount + i], false);
+  }
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  sleep(1);
+  for (int i = 0; i < kTestCount; i++) {
+    fprintf(stderr, "Test %d\n", i);
+    Test(i, &atomics[i], true);
+  }
+  for (int i = 0; i < kTestCount; i++) {
+    Test(i, &atomics[kTestCount + i], true);
+  }
+  pthread_join(t, 0);
+}
+
+// CHECK: Test 0
+// CHECK: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY{{.*}}tsan_interface_atomic
+// CHECK: Test 1
+// CHECK: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY{{.*}}tsan_interface_atomic
+// CHECK: Test 2
+// CHECK: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY{{.*}}tsan_interface_atomic
+// CHECK: Test 3
+// CHECK: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY{{.*}}tsan_interface_atomic
+// CHECK: Test 0 reverse
+// CHECK: ThreadSanitizer: data race
+// CHECK: Test 1 reverse
+// CHECK: ThreadSanitizer: data race
+// CHECK: Test 2 reverse
+// CHECK: ThreadSanitizer: data race
+// CHECK: Test 3 reverse
+// CHECK: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/atomic_stack.cc b/lib/tsan/lit_tests/atomic_stack.cc
new file mode 100644
index 0000000..50f6a8a
--- /dev/null
+++ b/lib/tsan/lit_tests/atomic_stack.cc
@@ -0,0 +1,29 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <unistd.h>
+
+int Global;
+
+void *Thread1(void *x) {
+  sleep(1);
+  __atomic_fetch_add(&Global, 1, __ATOMIC_RELAXED);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  Global++;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Atomic write of size 4
+// CHECK:     #0 __tsan_atomic32_fetch_add
+// CHECK:     #1 Thread1
diff --git a/lib/tsan/lit_tests/blacklist.cc b/lib/tsan/lit_tests/blacklist.cc
new file mode 100644
index 0000000..5baf926
--- /dev/null
+++ b/lib/tsan/lit_tests/blacklist.cc
@@ -0,0 +1,31 @@
+// Test blacklist functionality for TSan.
+
+// RUN: %clangxx_tsan -O1 %s \
+// RUN:   -fsanitize-blacklist=%p/Helpers/blacklist.txt \
+// RUN:   -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+
+int Global;
+
+void *Thread1(void *x) {
+  Global++;
+  return NULL;
+}
+
+void *Blacklisted_Thread2(void *x) {
+  Global--;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Blacklisted_Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  printf("PASS\n");
+  return 0;
+}
+
+// CHECK-NOT: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/fd_close_norace.cc b/lib/tsan/lit_tests/fd_close_norace.cc
index c000de4..a8b1a6d 100644
--- a/lib/tsan/lit_tests/fd_close_norace.cc
+++ b/lib/tsan/lit_tests/fd_close_norace.cc
@@ -25,6 +25,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
+  printf("OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/fd_dup_norace.cc b/lib/tsan/lit_tests/fd_dup_norace.cc
new file mode 100644
index 0000000..8826f90
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_dup_norace.cc
@@ -0,0 +1,34 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int fds[2];
+
+void *Thread1(void *x) {
+  char buf;
+  read(fds[0], &buf, 1);
+  close(fds[0]);
+  return 0;
+}
+
+void *Thread2(void *x) {
+  close(fds[1]);
+  return 0;
+}
+
+int main() {
+  fds[0] = open("/dev/random", O_RDONLY);
+  fds[1] = dup2(fds[0], 100);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  printf("OK\n");
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/fd_location.cc b/lib/tsan/lit_tests/fd_location.cc
new file mode 100644
index 0000000..35f9aab
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_location.cc
@@ -0,0 +1,33 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int fds[2];
+
+void *Thread1(void *x) {
+  write(fds[1], "a", 1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  sleep(1);
+  close(fds[0]);
+  close(fds[1]);
+  return NULL;
+}
+
+int main() {
+  pipe(fds);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Location is file descriptor {{[0-9]+}} created by main thread at:
+// CHECK:     #0 pipe
+// CHECK:     #1 main
+
diff --git a/lib/tsan/lit_tests/fd_pipe_norace.cc b/lib/tsan/lit_tests/fd_pipe_norace.cc
index 1be1a34..2da69ea 100644
--- a/lib/tsan/lit_tests/fd_pipe_norace.cc
+++ b/lib/tsan/lit_tests/fd_pipe_norace.cc
@@ -27,6 +27,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
+  printf("OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/fd_socket_connect_norace.cc b/lib/tsan/lit_tests/fd_socket_connect_norace.cc
new file mode 100644
index 0000000..065299a
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_socket_connect_norace.cc
@@ -0,0 +1,45 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+struct sockaddr_in addr;
+int X;
+
+void *ClientThread(void *x) {
+  int c = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+  X = 42;
+  if (connect(c, (struct sockaddr*)&addr, sizeof(addr))) {
+    perror("connect");
+    exit(1);
+  }
+  close(c);
+  return NULL;
+}
+
+int main() {
+  int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+  addr.sin_family = AF_INET;
+  inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr);
+  addr.sin_port = INADDR_ANY;
+  socklen_t len = sizeof(addr);
+  bind(s, (sockaddr*)&addr, len);
+  getsockname(s, (sockaddr*)&addr, &len);
+  listen(s, 10);
+  pthread_t t;
+  pthread_create(&t, 0, ClientThread, 0);
+  int c = accept(s, 0, 0);
+  X = 42;
+  pthread_join(t, 0);
+  close(c);
+  close(s);
+  printf("OK\n");
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+
diff --git a/lib/tsan/lit_tests/fd_socket_norace.cc b/lib/tsan/lit_tests/fd_socket_norace.cc
index 3a128f8..243fc9d 100644
--- a/lib/tsan/lit_tests/fd_socket_norace.cc
+++ b/lib/tsan/lit_tests/fd_socket_norace.cc
@@ -45,6 +45,7 @@
   close(c);
   close(s);
   pthread_join(t, 0);
+  printf("OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/fd_socketpair_norace.cc b/lib/tsan/lit_tests/fd_socketpair_norace.cc
new file mode 100644
index 0000000..f91e4ec
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_socketpair_norace.cc
@@ -0,0 +1,37 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+int fds[2];
+int X;
+
+void *Thread1(void *x) {
+  X = 42;
+  write(fds[1], "a", 1);
+  close(fds[1]);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  char buf;
+  while (read(fds[0], &buf, 1) != 1) {
+  }
+  X = 43;
+  close(fds[0]);
+  return NULL;
+}
+
+int main() {
+  socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  printf("OK\n");
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/free_race.c b/lib/tsan/lit_tests/free_race.c
index 7a2ec0c..ff71a4d 100644
--- a/lib/tsan/lit_tests/free_race.c
+++ b/lib/tsan/lit_tests/free_race.c
@@ -40,4 +40,5 @@
 // CHECK:     #1 main
 // CHECK:   Previous write of size 8 at {{.*}} by thread T1{{.*}}:
 // CHECK:     #0 free
-// CHECK:     #1 Thread1
+// CHECK:     #{{(1|2)}} Thread1
+// CHECK: SUMMARY: ThreadSanitizer: heap-use-after-free{{.*}}Thread2
diff --git a/lib/tsan/lit_tests/free_race2.c b/lib/tsan/lit_tests/free_race2.c
index 095f82e..f20774b 100644
--- a/lib/tsan/lit_tests/free_race2.c
+++ b/lib/tsan/lit_tests/free_race2.c
@@ -22,5 +22,5 @@
 // CHECK:     #1 main
 // CHECK:   Previous write of size 8 at {{.*}} by main thread:
 // CHECK:     #0 free
-// CHECK:     #1 foo
-// CHECK:     #2 main
+// CHECK:     #{{1|2}} foo
+// CHECK:     #{{2|3}} main
diff --git a/lib/tsan/lit_tests/global_race.cc b/lib/tsan/lit_tests/global_race.cc
new file mode 100644
index 0000000..0892d07
--- /dev/null
+++ b/lib/tsan/lit_tests/global_race.cc
@@ -0,0 +1,25 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stddef.h>
+
+int GlobalData[10];
+
+void *Thread(void *a) {
+  GlobalData[2] = 42;
+  return 0;
+}
+
+int main() {
+  fprintf(stderr, "addr=%p\n", GlobalData);
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  GlobalData[2] = 43;
+  pthread_join(t, 0);
+}
+
+// CHECK: addr=[[ADDR:0x[0-9,a-f]+]]
+// CHECK: WARNING: ThreadSanitizer: data race
+// Requires llvm-symbolizer, so disabled for now.
+// CHECK0: Location is global 'GlobalData' of size 40 at [[ADDR]]
+// CHECK0:                            (global_race.cc.exe+0x[0-9,a-f]+)
diff --git a/lib/tsan/lit_tests/ignore_race.cc b/lib/tsan/lit_tests/ignore_race.cc
index 7a60ca1..23d74d0 100644
--- a/lib/tsan/lit_tests/ignore_race.cc
+++ b/lib/tsan/lit_tests/ignore_race.cc
@@ -25,7 +25,7 @@
   sleep(1);
   Global = 43;
   pthread_join(t, 0);
-  return 0;
+  printf("OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/java.h b/lib/tsan/lit_tests/java.h
new file mode 100644
index 0000000..7d61f58
--- /dev/null
+++ b/lib/tsan/lit_tests/java.h
@@ -0,0 +1,17 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+extern "C" {
+typedef unsigned long jptr;  // NOLINT
+void __tsan_java_init(jptr heap_begin, jptr heap_size);
+int  __tsan_java_fini();
+void __tsan_java_alloc(jptr ptr, jptr size);
+void __tsan_java_free(jptr ptr, jptr size);
+void __tsan_java_move(jptr src, jptr dst, jptr size);
+void __tsan_java_mutex_lock(jptr addr);
+void __tsan_java_mutex_unlock(jptr addr);
+void __tsan_java_mutex_read_lock(jptr addr);
+void __tsan_java_mutex_read_unlock(jptr addr);
+}
diff --git a/lib/tsan/lit_tests/java_alloc.cc b/lib/tsan/lit_tests/java_alloc.cc
new file mode 100644
index 0000000..4dbce70
--- /dev/null
+++ b/lib/tsan/lit_tests/java_alloc.cc
@@ -0,0 +1,32 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include "java.h"
+
+int const kHeapSize = 1024 * 1024;
+
+void stress(jptr addr) {
+  for (jptr sz = 8; sz <= 32; sz <<= 1) {
+    for (jptr i = 0; i < kHeapSize / 4 / sz; i++) {
+      __tsan_java_alloc(addr + i * sz, sz);
+    }
+    __tsan_java_move(addr, addr + kHeapSize / 2, kHeapSize / 4);
+    __tsan_java_free(addr + kHeapSize / 2, kHeapSize / 4);
+  }
+}
+
+void *Thread(void *p) {
+  stress((jptr)p);
+  return 0;
+}
+
+int main() {
+  jptr jheap = (jptr)malloc(kHeapSize);
+  __tsan_java_init(jheap, kHeapSize);
+  pthread_t th;
+  pthread_create(&th, 0, Thread, (void*)(jheap + kHeapSize / 4));
+  stress(jheap);
+  pthread_join(th, 0);
+  printf("OK\n");
+  return __tsan_java_fini();
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/java_lock.cc b/lib/tsan/lit_tests/java_lock.cc
new file mode 100644
index 0000000..f66f1e7
--- /dev/null
+++ b/lib/tsan/lit_tests/java_lock.cc
@@ -0,0 +1,33 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include "java.h"
+
+jptr varaddr;
+jptr lockaddr;
+
+void *Thread(void *p) {
+  __tsan_java_mutex_lock(lockaddr);
+  *(int*)varaddr = 42;
+  __tsan_java_mutex_unlock(lockaddr);
+  return 0;
+}
+
+int main() {
+  int const kHeapSize = 1024 * 1024;
+  void *jheap = malloc(kHeapSize);
+  __tsan_java_init((jptr)jheap, kHeapSize);
+  const int kBlockSize = 16;
+  __tsan_java_alloc((jptr)jheap, kBlockSize);
+  varaddr = (jptr)jheap;
+  lockaddr = (jptr)jheap + 8;
+  pthread_t th;
+  pthread_create(&th, 0, Thread, 0);
+  __tsan_java_mutex_lock(lockaddr);
+  *(int*)varaddr = 43;
+  __tsan_java_mutex_unlock(lockaddr);
+  pthread_join(th, 0);
+  __tsan_java_free((jptr)jheap, kBlockSize);
+  printf("OK\n");
+  return __tsan_java_fini();
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/java_lock_move.cc b/lib/tsan/lit_tests/java_lock_move.cc
new file mode 100644
index 0000000..48b5a5a
--- /dev/null
+++ b/lib/tsan/lit_tests/java_lock_move.cc
@@ -0,0 +1,40 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include "java.h"
+
+jptr varaddr;
+jptr lockaddr;
+jptr varaddr2;
+jptr lockaddr2;
+
+void *Thread(void *p) {
+  sleep(1);
+  __tsan_java_mutex_lock(lockaddr2);
+  *(int*)varaddr2 = 42;
+  __tsan_java_mutex_unlock(lockaddr2);
+  return 0;
+}
+
+int main() {
+  int const kHeapSize = 1024 * 1024;
+  void *jheap = malloc(kHeapSize);
+  __tsan_java_init((jptr)jheap, kHeapSize);
+  const int kBlockSize = 64;
+  int const kMove = 1024;
+  __tsan_java_alloc((jptr)jheap, kBlockSize);
+  varaddr = (jptr)jheap;
+  lockaddr = (jptr)jheap + 46;
+  varaddr2 = varaddr + kMove;
+  lockaddr2 = lockaddr + kMove;
+  pthread_t th;
+  pthread_create(&th, 0, Thread, 0);
+  __tsan_java_mutex_lock(lockaddr);
+  *(int*)varaddr = 43;
+  __tsan_java_mutex_unlock(lockaddr);
+  __tsan_java_move(varaddr, varaddr2, kBlockSize);
+  pthread_join(th, 0);
+  __tsan_java_free(varaddr2, kBlockSize);
+  printf("OK\n");
+  return __tsan_java_fini();
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/java_race.cc b/lib/tsan/lit_tests/java_race.cc
new file mode 100644
index 0000000..722bb6e
--- /dev/null
+++ b/lib/tsan/lit_tests/java_race.cc
@@ -0,0 +1,23 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include "java.h"
+
+void *Thread(void *p) {
+  *(int*)p = 42;
+  return 0;
+}
+
+int main() {
+  int const kHeapSize = 1024 * 1024;
+  void *jheap = malloc(kHeapSize);
+  __tsan_java_init((jptr)jheap, kHeapSize);
+  const int kBlockSize = 16;
+  __tsan_java_alloc((jptr)jheap, kBlockSize);
+  pthread_t th;
+  pthread_create(&th, 0, Thread, jheap);
+  *(int*)jheap = 43;
+  pthread_join(th, 0);
+  __tsan_java_free((jptr)jheap, kBlockSize);
+  return __tsan_java_fini();
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/java_race_move.cc b/lib/tsan/lit_tests/java_race_move.cc
new file mode 100644
index 0000000..bb63ea9
--- /dev/null
+++ b/lib/tsan/lit_tests/java_race_move.cc
@@ -0,0 +1,31 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include "java.h"
+
+jptr varaddr;
+jptr varaddr2;
+
+void *Thread(void *p) {
+  sleep(1);
+  *(int*)varaddr2 = 42;
+  return 0;
+}
+
+int main() {
+  int const kHeapSize = 1024 * 1024;
+  void *jheap = malloc(kHeapSize);
+  __tsan_java_init((jptr)jheap, kHeapSize);
+  const int kBlockSize = 64;
+  int const kMove = 1024;
+  __tsan_java_alloc((jptr)jheap, kBlockSize);
+  varaddr = (jptr)jheap + 16;
+  varaddr2 = varaddr + kMove;
+  pthread_t th;
+  pthread_create(&th, 0, Thread, 0);
+  *(int*)varaddr = 43;
+  __tsan_java_move(varaddr, varaddr2, kBlockSize);
+  pthread_join(th, 0);
+  __tsan_java_free(varaddr2, kBlockSize);
+  return __tsan_java_fini();
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/java_rwlock.cc b/lib/tsan/lit_tests/java_rwlock.cc
new file mode 100644
index 0000000..1e8940a
--- /dev/null
+++ b/lib/tsan/lit_tests/java_rwlock.cc
@@ -0,0 +1,33 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include "java.h"
+
+jptr varaddr;
+jptr lockaddr;
+
+void *Thread(void *p) {
+  __tsan_java_mutex_read_lock(lockaddr);
+  *(int*)varaddr = 42;
+  __tsan_java_mutex_read_unlock(lockaddr);
+  return 0;
+}
+
+int main() {
+  int const kHeapSize = 1024 * 1024;
+  void *jheap = malloc(kHeapSize);
+  __tsan_java_init((jptr)jheap, kHeapSize);
+  const int kBlockSize = 16;
+  __tsan_java_alloc((jptr)jheap, kBlockSize);
+  varaddr = (jptr)jheap;
+  lockaddr = (jptr)jheap + 8;
+  pthread_t th;
+  pthread_create(&th, 0, Thread, 0);
+  __tsan_java_mutex_lock(lockaddr);
+  *(int*)varaddr = 43;
+  __tsan_java_mutex_unlock(lockaddr);
+  pthread_join(th, 0);
+  __tsan_java_free((jptr)jheap, kBlockSize);
+  printf("OK\n");
+  return __tsan_java_fini();
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/mutex_destroy_locked.cc b/lib/tsan/lit_tests/mutex_destroy_locked.cc
index 8523f55..27a0424 100644
--- a/lib/tsan/lit_tests/mutex_destroy_locked.cc
+++ b/lib/tsan/lit_tests/mutex_destroy_locked.cc
@@ -2,19 +2,11 @@
 #include <pthread.h>
 #include <unistd.h>
 
-void *Thread(void *p) {
-  pthread_mutex_lock((pthread_mutex_t*)p);
-  return 0;
-}
-
 int main() {
   pthread_mutex_t m;
   pthread_mutex_init(&m, 0);
-  pthread_t t;
-  pthread_create(&t, 0, Thread, &m);
-  sleep(1);
+  pthread_mutex_lock(&m);
   pthread_mutex_destroy(&m);
-  pthread_join(t, 0);
   return 0;
 }
 
@@ -23,7 +15,8 @@
 // CHECK:     #1 main
 // CHECK:   and:
 // CHECK:     #0 pthread_mutex_lock
-// CHECK:     #1 Thread
+// CHECK:     #1 main
 // CHECK:   Mutex {{.*}} created at:
 // CHECK:     #0 pthread_mutex_init
 // CHECK:     #1 main
+// CHECK: SUMMARY: ThreadSanitizer: destroy of a locked mutex{{.*}}main
diff --git a/lib/tsan/lit_tests/mutexset1.cc b/lib/tsan/lit_tests/mutexset1.cc
index e27e056..f32a770 100644
--- a/lib/tsan/lit_tests/mutexset1.cc
+++ b/lib/tsan/lit_tests/mutexset1.cc
@@ -20,6 +20,13 @@
 }
 
 int main() {
+  // CHECK: WARNING: ThreadSanitizer: data race
+  // CHECK:   Write of size 4 at {{.*}} by thread T1
+  // CHECK:                         (mutexes: write [[M1:M[0-9]+]]):
+  // CHECK:   Previous write of size 4 at {{.*}} by thread T2:
+  // CHECK:   Mutex [[M1]] created at:
+  // CHECK:     #0 pthread_mutex_init
+  // CHECK:     #1 main {{.*}}/mutexset1.cc:[[@LINE+1]]
   pthread_mutex_init(&mtx, 0);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
@@ -28,12 +35,3 @@
   pthread_join(t[1], NULL);
   pthread_mutex_destroy(&mtx);
 }
-
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 4 at {{.*}} by thread T1
-// CHECK:                         (mutexes: write [[M1:M[0-9]+]]):
-// CHECK:   Previous write of size 4 at {{.*}} by thread T2:
-// CHECK:   Mutex [[M1]] created at:
-// CHECK:     #0 pthread_mutex_init
-// CHECK:     #1 main {{.*}}/mutexset1.cc:23
-
diff --git a/lib/tsan/lit_tests/mutexset2.cc b/lib/tsan/lit_tests/mutexset2.cc
index 8c85043..15d2303 100644
--- a/lib/tsan/lit_tests/mutexset2.cc
+++ b/lib/tsan/lit_tests/mutexset2.cc
@@ -20,6 +20,13 @@
 }
 
 int main() {
+  // CHECK: WARNING: ThreadSanitizer: data race
+  // CHECK:   Write of size 4 at {{.*}} by thread T2:
+  // CHECK:   Previous write of size 4 at {{.*}} by thread T1
+  // CHECK:                     (mutexes: write [[M1:M[0-9]+]]):
+  // CHECK:   Mutex [[M1]] created at:
+  // CHECK:     #0 pthread_mutex_init
+  // CHECK:     #1 main {{.*}}/mutexset2.cc:[[@LINE+1]]
   pthread_mutex_init(&mtx, 0);
   pthread_t t[2];
   pthread_create(&t[0], NULL, Thread1, NULL);
@@ -28,12 +35,3 @@
   pthread_join(t[1], NULL);
   pthread_mutex_destroy(&mtx);
 }
-
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 4 at {{.*}} by thread T2:
-// CHECK:   Previous write of size 4 at {{.*}} by thread T1
-// CHECK:                     (mutexes: write [[M1:M[0-9]+]]):
-// CHECK:   Mutex [[M1]] created at:
-// CHECK:     #0 pthread_mutex_init
-// CHECK:     #1 main {{.*}}/mutexset2.cc:23
-
diff --git a/lib/tsan/lit_tests/mutexset3.cc b/lib/tsan/lit_tests/mutexset3.cc
index 63123f8..6ac7ad1 100644
--- a/lib/tsan/lit_tests/mutexset3.cc
+++ b/lib/tsan/lit_tests/mutexset3.cc
@@ -23,6 +23,16 @@
 }
 
 int main() {
+  // CHECK: WARNING: ThreadSanitizer: data race
+  // CHECK: Write of size 4 at {{.*}} by thread T1
+  // CHECK:               (mutexes: write [[M1:M[0-9]+]], write [[M2:M[0-9]+]]):
+  // CHECK:   Previous write of size 4 at {{.*}} by thread T2:
+  // CHECK:   Mutex [[M1]] created at:
+  // CHECK:     #0 pthread_mutex_init
+  // CHECK:     #1 main {{.*}}/mutexset3.cc:[[@LINE+4]]
+  // CHECK:   Mutex [[M2]] created at:
+  // CHECK:     #0 pthread_mutex_init
+  // CHECK:     #1 main {{.*}}/mutexset3.cc:[[@LINE+2]]
   pthread_mutex_init(&mtx1, 0);
   pthread_mutex_init(&mtx2, 0);
   pthread_t t[2];
@@ -33,15 +43,3 @@
   pthread_mutex_destroy(&mtx1);
   pthread_mutex_destroy(&mtx2);
 }
-
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK: Write of size 4 at {{.*}} by thread T1
-// CHECK:               (mutexes: write [[M1:M[0-9]+]], write [[M2:M[0-9]+]]):
-// CHECK:   Previous write of size 4 at {{.*}} by thread T2:
-// CHECK:   Mutex [[M1]] created at:
-// CHECK:     #0 pthread_mutex_init
-// CHECK:     #1 main {{.*}}/mutexset3.cc:26
-// CHECK:   Mutex [[M2]] created at:
-// CHECK:     #0 pthread_mutex_init
-// CHECK:     #1 main {{.*}}/mutexset3.cc:27
-
diff --git a/lib/tsan/lit_tests/mutexset4.cc b/lib/tsan/lit_tests/mutexset4.cc
index 68ed475..75684cf 100644
--- a/lib/tsan/lit_tests/mutexset4.cc
+++ b/lib/tsan/lit_tests/mutexset4.cc
@@ -23,6 +23,16 @@
 }
 
 int main() {
+  // CHECK: WARNING: ThreadSanitizer: data race
+  // CHECK:   Write of size 4 at {{.*}} by thread T2:
+  // CHECK:   Previous write of size 4 at {{.*}} by thread T1
+  // CHECK:                 (mutexes: write [[M1:M[0-9]+]], write [[M2:M[0-9]+]]):
+  // CHECK:   Mutex [[M1]] created at:
+  // CHECK:     #0 pthread_mutex_init
+  // CHECK:     #1 main {{.*}}/mutexset4.cc:[[@LINE+4]]
+  // CHECK:   Mutex [[M2]] created at:
+  // CHECK:     #0 pthread_mutex_init
+  // CHECK:     #1 main {{.*}}/mutexset4.cc:[[@LINE+2]]
   pthread_mutex_init(&mtx1, 0);
   pthread_mutex_init(&mtx2, 0);
   pthread_t t[2];
@@ -33,15 +43,3 @@
   pthread_mutex_destroy(&mtx1);
   pthread_mutex_destroy(&mtx2);
 }
-
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 4 at {{.*}} by thread T2:
-// CHECK:   Previous write of size 4 at {{.*}} by thread T1
-// CHECK:                 (mutexes: write [[M1:M[0-9]+]], write [[M2:M[0-9]+]]):
-// CHECK:   Mutex [[M1]] created at:
-// CHECK:     #0 pthread_mutex_init
-// CHECK:     #1 main {{.*}}/mutexset4.cc:26
-// CHECK:   Mutex [[M2]] created at:
-// CHECK:     #0 pthread_mutex_init
-// CHECK:     #1 main {{.*}}/mutexset4.cc:27
-
diff --git a/lib/tsan/lit_tests/mutexset5.cc b/lib/tsan/lit_tests/mutexset5.cc
index 85fdadd..6e75810 100644
--- a/lib/tsan/lit_tests/mutexset5.cc
+++ b/lib/tsan/lit_tests/mutexset5.cc
@@ -23,6 +23,17 @@
 }
 
 int main() {
+  // CHECK: WARNING: ThreadSanitizer: data race
+  // CHECK:   Write of size 4 at {{.*}} by thread T1
+  // CHECK:                              (mutexes: write [[M1:M[0-9]+]]):
+  // CHECK:   Previous write of size 4 at {{.*}} by thread T2
+  // CHECK:                              (mutexes: write [[M2:M[0-9]+]]):
+  // CHECK:   Mutex [[M1]] created at:
+  // CHECK:     #0 pthread_mutex_init
+  // CHECK:     #1 main {{.*}}/mutexset5.cc:[[@LINE+4]]
+  // CHECK:   Mutex [[M2]] created at:
+  // CHECK:     #0 pthread_mutex_init
+  // CHECK:     #1 main {{.*}}/mutexset5.cc:[[@LINE+5]]
   pthread_mutex_init(&mtx1, 0);
   pthread_mutex_init(&mtx2, 0);
   pthread_t t[2];
@@ -33,16 +44,3 @@
   pthread_mutex_destroy(&mtx1);
   pthread_mutex_destroy(&mtx2);
 }
-
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 4 at {{.*}} by thread T1
-// CHECK:                              (mutexes: write [[M1:M[0-9]+]]):
-// CHECK:   Previous write of size 4 at {{.*}} by thread T2
-// CHECK:                              (mutexes: write [[M2:M[0-9]+]]):
-// CHECK:   Mutex [[M1]] created at:
-// CHECK:     #0 pthread_mutex_init
-// CHECK:     #1 main {{.*}}/mutexset5.cc:26
-// CHECK:   Mutex [[M2]] created at:
-// CHECK:     #0 pthread_mutex_init
-// CHECK:     #1 main {{.*}}/mutexset5.cc:27
-
diff --git a/lib/tsan/lit_tests/mutexset6.cc b/lib/tsan/lit_tests/mutexset6.cc
index b28c21c..4b19a12 100644
--- a/lib/tsan/lit_tests/mutexset6.cc
+++ b/lib/tsan/lit_tests/mutexset6.cc
@@ -28,6 +28,17 @@
 }
 
 int main() {
+  // CHECK: WARNING: ThreadSanitizer: data race
+  // CHECK:   Write of size 4 at {{.*}} by thread T1
+  // CHECK:                          (mutexes: write [[M1:M[0-9]+]]):
+  // CHECK:   Previous write of size 4 at {{.*}} by thread T2
+  // CHECK:               (mutexes: write [[M2:M[0-9]+]], read [[M3:M[0-9]+]]):
+  // CHECK:   Mutex [[M1]] created at:
+  // CHECK:     #1 main {{.*}}/mutexset6.cc:[[@LINE+5]]
+  // CHECK:   Mutex [[M2]] created at:
+  // CHECK:     #1 main {{.*}}/mutexset6.cc:[[@LINE+4]]
+  // CHECK:   Mutex [[M3]] created at:
+  // CHECK:     #1 main {{.*}}/mutexset6.cc:[[@LINE+3]]
   pthread_mutex_init(&mtx1, 0);
   pthread_spin_init(&mtx2, 0);
   pthread_rwlock_init(&mtx3, 0);
@@ -40,16 +51,3 @@
   pthread_spin_destroy(&mtx2);
   pthread_rwlock_destroy(&mtx3);
 }
-
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 4 at {{.*}} by thread T1
-// CHECK:                          (mutexes: write [[M1:M[0-9]+]]):
-// CHECK:   Previous write of size 4 at {{.*}} by thread T2
-// CHECK:               (mutexes: write [[M2:M[0-9]+]], read [[M3:M[0-9]+]]):
-// CHECK:   Mutex [[M1]] created at:
-// CHECK:     #1 main {{.*}}/mutexset6.cc:31
-// CHECK:   Mutex [[M2]] created at:
-// CHECK:     #1 main {{.*}}/mutexset6.cc:32
-// CHECK:   Mutex [[M3]] created at:
-// CHECK:     #1 main {{.*}}/mutexset6.cc:33
-
diff --git a/lib/tsan/lit_tests/race_on_heap.cc b/lib/tsan/lit_tests/race_on_heap.cc
index 62987bf..35434ea 100644
--- a/lib/tsan/lit_tests/race_on_heap.cc
+++ b/lib/tsan/lit_tests/race_on_heap.cc
@@ -39,9 +39,9 @@
 // ...
 // CHECK: Location is heap block of size 99 at [[ADDR]] allocated by thread T1:
 // CHCEKL     #0 malloc
-// CHECK:     #1 alloc
-// CHECK:     #2 AllocThread
+// CHECK:     #{{1|2}} alloc
+// CHECK:     #{{2|3}} AllocThread
 // ...
-// CHECK:   Thread T1 (tid={{.*}}, finished) created at:
+// CHECK:   Thread T1 (tid={{.*}}, finished) created by main thread at:
 // CHECK:     #0 pthread_create
 // CHECK:     #1 main
diff --git a/lib/tsan/lit_tests/race_on_mutex.c b/lib/tsan/lit_tests/race_on_mutex.c
index de1c2d4..aff32f9 100644
--- a/lib/tsan/lit_tests/race_on_mutex.c
+++ b/lib/tsan/lit_tests/race_on_mutex.c
@@ -34,7 +34,7 @@
 }
 
 // CHECK:      WARNING: ThreadSanitizer: data race
-// CHECK-NEXT:   Read of size 1 at {{.*}} by thread T2:
+// CHECK-NEXT:   Atomic read of size 1 at {{.*}} by thread T2:
 // CHECK-NEXT:     #0 pthread_mutex_lock
 // CHECK-NEXT:     #1 Thread2{{.*}} {{.*}}race_on_mutex.c:20{{(:3)?}} ({{.*}})
 // CHECK:        Previous write of size 1 at {{.*}} by thread T1:
diff --git a/lib/tsan/lit_tests/race_on_mutex2.c b/lib/tsan/lit_tests/race_on_mutex2.c
new file mode 100644
index 0000000..84bef75
--- /dev/null
+++ b/lib/tsan/lit_tests/race_on_mutex2.c
@@ -0,0 +1,24 @@
+// RUN: %clang_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <unistd.h>
+
+void *Thread(void *x) {
+  pthread_mutex_lock((pthread_mutex_t*)x);
+  pthread_mutex_unlock((pthread_mutex_t*)x);
+  return 0;
+}
+
+int main() {
+  pthread_mutex_t Mtx;
+  pthread_mutex_init(&Mtx, 0);
+  pthread_t t;
+  pthread_create(&t, 0, Thread, &Mtx);
+  sleep(1);
+  pthread_mutex_destroy(&Mtx);
+  pthread_join(t, 0);
+  return 0;
+}
+
+// CHECK:      WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/race_on_read.cc b/lib/tsan/lit_tests/race_on_read.cc
new file mode 100644
index 0000000..7d22681
--- /dev/null
+++ b/lib/tsan/lit_tests/race_on_read.cc
@@ -0,0 +1,32 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int fd;
+char buf;
+
+void *Thread(void *x) {
+  read(fd, &buf, 1);
+  return NULL;
+}
+
+int main() {
+  fd = open("/dev/random", O_RDONLY);
+  if (fd < 0) return 1;
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread, NULL);
+  pthread_create(&t[1], NULL, Thread, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  close(fd);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 1
+// CHECK:     #0 read
+// CHECK:   Previous write of size 1
+// CHECK:     #0 read
diff --git a/lib/tsan/lit_tests/race_on_write.cc b/lib/tsan/lit_tests/race_on_write.cc
new file mode 100644
index 0000000..f1b0bb1
--- /dev/null
+++ b/lib/tsan/lit_tests/race_on_write.cc
@@ -0,0 +1,39 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int fd;
+char buf;
+
+void *Thread1(void *x) {
+  buf = 1;
+  sleep(1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  write(fd, &buf, 1);
+  return NULL;
+}
+
+int main() {
+  fd = open("/dev/null", O_WRONLY);
+  if (fd < 0) return 1;
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  sleep(1);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  close(fd);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Read of size 1
+// CHECK:     #0 write
+// CHECK:   Previous write of size 1
+// CHECK:     #0 Thread1
diff --git a/lib/tsan/lit_tests/race_with_finished_thread.cc b/lib/tsan/lit_tests/race_with_finished_thread.cc
index cc7834a..a267290 100644
--- a/lib/tsan/lit_tests/race_with_finished_thread.cc
+++ b/lib/tsan/lit_tests/race_with_finished_thread.cc
@@ -38,6 +38,6 @@
 // CHECK:   Previous write of size 4 at {{.*}} by thread T1:
 // CHECK:     #0 foobar
 // CHECK:     #1 Thread1
-// CHECK:   Thread T1 (tid={{.*}}, finished) created at:
+// CHECK:   Thread T1 (tid={{.*}}, finished) created by main thread at:
 // CHECK:     #0 pthread_create
 // CHECK:     #1 main
diff --git a/lib/tsan/lit_tests/signal_errno.cc b/lib/tsan/lit_tests/signal_errno.cc
new file mode 100644
index 0000000..8181555
--- /dev/null
+++ b/lib/tsan/lit_tests/signal_errno.cc
@@ -0,0 +1,43 @@
+// RUN: %clang_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+
+pthread_t mainth;
+volatile int done;
+
+static void MyHandler(int, siginfo_t *s, void *c) {
+  errno = 1;
+  done = 1;
+}
+
+static void* sendsignal(void *p) {
+  pthread_kill(mainth, SIGPROF);
+  return 0;
+}
+
+int main() {
+  mainth = pthread_self();
+  struct sigaction act = {};
+  act.sa_sigaction = &MyHandler;
+  sigaction(SIGPROF, &act, 0);
+  pthread_t th;
+  pthread_create(&th, 0, sendsignal, 0);
+  while (done == 0) {
+    volatile char *p = (char*)malloc(1);
+    p[0] = 0;
+    free((void*)p);
+    pthread_yield();
+  }
+  pthread_join(th, 0);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: signal handler spoils errno
+// CHECK:     #0 MyHandler(int, siginfo{{(_t)?}}*, void*) {{.*}}signal_errno.cc
+// CHECK: SUMMARY: ThreadSanitizer: signal handler spoils errno{{.*}}MyHandler
+
diff --git a/lib/tsan/lit_tests/signal_malloc.cc b/lib/tsan/lit_tests/signal_malloc.cc
new file mode 100644
index 0000000..4dbc2f7
--- /dev/null
+++ b/lib/tsan/lit_tests/signal_malloc.cc
@@ -0,0 +1,26 @@
+// RUN: %clang_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static void handler(int, siginfo_t*, void*) {
+  // CHECK: WARNING: ThreadSanitizer: signal-unsafe call inside of a signal
+  // CHECK:     #0 malloc
+  // CHECK:     #{{(1|2)}} handler(int, siginfo{{(_t)?}}*, void*) {{.*}}signal_malloc.cc:[[@LINE+2]]
+  // CHECK: SUMMARY: ThreadSanitizer: signal-unsafe call inside of a signal{{.*}}handler
+  volatile char *p = (char*)malloc(1);
+  p[0] = 0;
+  free((void*)p);
+}
+
+int main() {
+  struct sigaction act = {};
+  act.sa_sigaction = &handler;
+  sigaction(SIGPROF, &act, 0);
+  kill(getpid(), SIGPROF);
+  sleep(1);
+  return 0;
+}
+
diff --git a/lib/tsan/lit_tests/simple_race.cc b/lib/tsan/lit_tests/simple_race.cc
index ec29c92..99cf228 100644
--- a/lib/tsan/lit_tests/simple_race.cc
+++ b/lib/tsan/lit_tests/simple_race.cc
@@ -23,3 +23,4 @@
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/lib/tsan/lit_tests/simple_stack.c b/lib/tsan/lit_tests/simple_stack.c
index 6de20cb..4539cb7 100644
--- a/lib/tsan/lit_tests/simple_stack.c
+++ b/lib/tsan/lit_tests/simple_stack.c
@@ -56,11 +56,11 @@
 // CHECK-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack.c:18{{(:26)?}} ({{.*}})
 // CHECK-NEXT:     #1 bar2{{.*}} {{.*}}simple_stack.c:23{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 Thread2{{.*}} {{.*}}simple_stack.c:33{{(:3)?}} ({{.*}})
-// CHECK:        Thread T1 (tid={{.*}}, running) created at:
+// CHECK:        Thread T1 (tid={{.*}}, running) created by main thread at:
 // CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
 // CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:38{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:43{{(:3)?}} ({{.*}})
-// CHECK:        Thread T2 ({{.*}}) created at:
+// CHECK:        Thread T2 ({{.*}}) created by main thread at:
 // CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
 // CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:38{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:44{{(:3)?}} ({{.*}})
diff --git a/lib/tsan/lit_tests/stack_race.cc b/lib/tsan/lit_tests/stack_race.cc
new file mode 100644
index 0000000..beeb573
--- /dev/null
+++ b/lib/tsan/lit_tests/stack_race.cc
@@ -0,0 +1,20 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stddef.h>
+
+void *Thread(void *a) {
+  *(int*)a = 43;
+  return 0;
+}
+
+int main() {
+  int Var = 42;
+  pthread_t t;
+  pthread_create(&t, 0, Thread, &Var);
+  Var = 43;
+  pthread_join(t, 0);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Location is stack of main thread.
+
diff --git a/lib/tsan/lit_tests/stack_race2.cc b/lib/tsan/lit_tests/stack_race2.cc
new file mode 100644
index 0000000..5bdf1bd
--- /dev/null
+++ b/lib/tsan/lit_tests/stack_race2.cc
@@ -0,0 +1,28 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stddef.h>
+#include <unistd.h>
+
+void *Thread2(void *a) {
+  *(int*)a = 43;
+  return 0;
+}
+
+void *Thread(void *a) {
+  int Var = 42;
+  pthread_t t;
+  pthread_create(&t, 0, Thread2, &Var);
+  Var = 42;
+  pthread_join(t, 0);
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  pthread_join(t, 0);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Location is stack of thread T1.
+
diff --git a/lib/tsan/lit_tests/static_init6.cc b/lib/tsan/lit_tests/static_init6.cc
new file mode 100644
index 0000000..c9099f9
--- /dev/null
+++ b/lib/tsan/lit_tests/static_init6.cc
@@ -0,0 +1,42 @@
+// RUN: %clangxx_tsan -static-libstdc++ -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sched.h>
+
+struct Cache {
+  int x;
+  explicit Cache(int x)
+    : x(x) {
+  }
+};
+
+void *AsyncInit(void *p) {
+  return new Cache((int)(long)p);
+}
+
+Cache *CreateCache() {
+  pthread_t t;
+  pthread_create(&t, 0, AsyncInit, (void*)(long)rand());
+  void *res;
+  pthread_join(t, &res);
+  return (Cache*)res;
+}
+
+void *Thread1(void *x) {
+  static Cache *c = CreateCache();
+  if (c->x >= RAND_MAX)
+    exit(1);
+  return 0;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], 0, Thread1, 0);
+  pthread_create(&t[1], 0, Thread1, 0);
+  pthread_join(t[0], 0);
+  pthread_join(t[1], 0);
+  printf("PASS\n");
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/test_output.sh b/lib/tsan/lit_tests/test_output.sh
index 3798ff0..d21c9a7 100755
--- a/lib/tsan/lit_tests/test_output.sh
+++ b/lib/tsan/lit_tests/test_output.sh
@@ -4,13 +4,14 @@
 set -e # fail on any error
 
 ROOTDIR=$(dirname $0)/..
+BLACKLIST=$ROOTDIR/lit_tests/Helpers/blacklist.txt
 
 # Assuming clang is in path.
 CC=clang
 CXX=clang++
 
 # TODO: add testing for all of -O0...-O3
-CFLAGS="-fsanitize=thread -fPIE -O1 -g -fno-builtin -Wall"
+CFLAGS="-fsanitize=thread -fsanitize-blacklist=$BLACKLIST -fPIE -O1 -g -fno-builtin -Wall"
 LDFLAGS="-pie -lpthread -ldl $ROOTDIR/rtl/libtsan.a"
 
 test_file() {
@@ -21,10 +22,7 @@
   EXE=$SRC.exe
   $COMPILER $SRC $CFLAGS -c -o $OBJ
   $COMPILER $OBJ $LDFLAGS -o $EXE
-  RES=$(TSAN_OPTIONS="atexit_sleep_ms=0" $EXE 2>&1 || true)
-  if [ "$3" != "" ]; then
-    printf "%s\n" "$RES"
-  fi
+  RES=$($EXE 2>&1 || true)
   printf "%s\n" "$RES" | FileCheck $SRC
   if [ "$3" == "" ]; then
     rm -f $EXE $OBJ
diff --git a/lib/tsan/lit_tests/thread_leak3.c b/lib/tsan/lit_tests/thread_leak3.c
index c48219f..a39c93c 100644
--- a/lib/tsan/lit_tests/thread_leak3.c
+++ b/lib/tsan/lit_tests/thread_leak3.c
@@ -12,3 +12,4 @@
 }
 
 // CHECK: WARNING: ThreadSanitizer: thread leak
+// CHECK: SUMMARY: ThreadSanitizer: thread leak{{.*}}main
diff --git a/lib/tsan/lit_tests/thread_name.cc b/lib/tsan/lit_tests/thread_name.cc
index 04081c1..afb882f 100644
--- a/lib/tsan/lit_tests/thread_name.cc
+++ b/lib/tsan/lit_tests/thread_name.cc
@@ -15,7 +15,11 @@
 }
 
 void *Thread2(void *x) {
+#if defined(__linux__) && __GLIBC_PREREQ(2, 12)
+  pthread_setname_np(pthread_self(), "Thread2");
+#else
   AnnotateThreadName(__FILE__, __LINE__, "Thread2");
+#endif
   Global--;
   return NULL;
 }
diff --git a/lib/tsan/lit_tests/tls_race.cc b/lib/tsan/lit_tests/tls_race.cc
new file mode 100644
index 0000000..bed6aaf
--- /dev/null
+++ b/lib/tsan/lit_tests/tls_race.cc
@@ -0,0 +1,19 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stddef.h>
+
+void *Thread(void *a) {
+  *(int*)a = 43;
+  return 0;
+}
+
+int main() {
+  static __thread int Var = 42;
+  pthread_t t;
+  pthread_create(&t, 0, Thread, &Var);
+  Var = 43;
+  pthread_join(t, 0);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Location is TLS of main thread.
diff --git a/lib/tsan/lit_tests/tls_race2.cc b/lib/tsan/lit_tests/tls_race2.cc
new file mode 100644
index 0000000..110abaa
--- /dev/null
+++ b/lib/tsan/lit_tests/tls_race2.cc
@@ -0,0 +1,28 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stddef.h>
+#include <unistd.h>
+
+void *Thread2(void *a) {
+  *(int*)a = 43;
+  return 0;
+}
+
+void *Thread(void *a) {
+  static __thread int Var = 42;
+  pthread_t t;
+  pthread_create(&t, 0, Thread2, &Var);
+  Var = 42;
+  pthread_join(t, 0);
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  pthread_join(t, 0);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Location is TLS of thread T1.
+
diff --git a/lib/tsan/lit_tests/tsan-vs-gvn.cc b/lib/tsan/lit_tests/tsan-vs-gvn.cc
new file mode 100644
index 0000000..40ae724
--- /dev/null
+++ b/lib/tsan/lit_tests/tsan-vs-gvn.cc
@@ -0,0 +1,38 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O2 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O3 %s -o %t && %t 2>&1 | FileCheck %s
+//
+// Check that load widening is not tsan-hostile.
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+
+struct {
+  int i;
+  char c1, c2, c3, c4;
+} S;
+
+int G;
+
+void *Thread1(void *x) {
+  G = S.c1 + S.c3;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  S.c2 = 1;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  memset(&S, 123, sizeof(S));
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  printf("PASS\n");
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: PASS
diff --git a/lib/tsan/lit_tests/user_fopen.cc b/lib/tsan/lit_tests/user_fopen.cc
new file mode 100644
index 0000000..794d598
--- /dev/null
+++ b/lib/tsan/lit_tests/user_fopen.cc
@@ -0,0 +1,34 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <stdio.h>
+#include <stdlib.h>
+
+// defined by tsan.
+extern "C" FILE *__interceptor_fopen(const char *file, const char *mode);
+extern "C" int __interceptor_fileno(FILE *f);
+
+extern "C" FILE *fopen(const char *file, const char *mode) {
+  static int first = 0;
+  if (__sync_lock_test_and_set(&first, 1) == 0)
+    printf("user fopen\n");
+  return __interceptor_fopen(file, mode);
+}
+
+extern "C" int fileno(FILE *f) {
+  static int first = 0;
+  if (__sync_lock_test_and_set(&first, 1) == 0)
+    printf("user fileno\n");
+  return 1;
+}
+
+int main() {
+  FILE *f = fopen("/dev/zero", "r");
+  if (f) {
+    char buf;
+    fread(&buf, 1, 1, f);
+    fclose(f);
+  }
+}
+
+// CHECK: user fopen
+// CHECK-NOT: ThreadSanitizer
+
diff --git a/lib/tsan/lit_tests/user_malloc.cc b/lib/tsan/lit_tests/user_malloc.cc
new file mode 100644
index 0000000..0be6d54
--- /dev/null
+++ b/lib/tsan/lit_tests/user_malloc.cc
@@ -0,0 +1,27 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <stdio.h>
+
+// defined by tsan.
+extern "C" void *__interceptor_malloc(unsigned long size);
+extern "C" void __interceptor_free(void *p);
+
+extern "C" void *malloc(unsigned long size) {
+  static int first = 0;
+  if (__sync_lock_test_and_set(&first, 1) == 0)
+    printf("user malloc\n");
+  return __interceptor_malloc(size);
+}
+
+extern "C" void free(void *p) {
+  __interceptor_free(p);
+}
+
+int main() {
+  volatile char *p = (char*)malloc(10);
+  p[0] = 0;
+  free((void*)p);
+}
+
+// CHECK: user malloc
+// CHECK-NOT: ThreadSanitizer
+
diff --git a/lib/tsan/rtl/CMakeLists.txt b/lib/tsan/rtl/CMakeLists.txt
index f1a3d3b..f2a8533 100644
--- a/lib/tsan/rtl/CMakeLists.txt
+++ b/lib/tsan/rtl/CMakeLists.txt
@@ -1,10 +1,12 @@
 set(TSAN_SOURCES
   tsan_clock.cc
   tsan_flags.cc
+  tsan_fd.cc
   tsan_interceptors.cc
   tsan_interface_ann.cc
   tsan_interface_atomic.cc
   tsan_interface.cc
+  tsan_interface_java.cc
   tsan_md5.cc
   tsan_mman.cc
   tsan_mutex.cc
@@ -31,26 +33,17 @@
 
 set(TSAN_RUNTIME_LIBRARIES)
 # TSan is currently supported on 64-bit Linux only.
-if(CAN_TARGET_X86_64 AND UNIX AND NOT APPLE)
+if(CAN_TARGET_x86_64 AND UNIX AND NOT APPLE)
   set(TSAN_ASM_SOURCES tsan_rtl_amd64.S)
   # Pass ASM file directly to the C++ compiler.
   set_source_files_properties(${TSAN_ASM_SOURCES} PROPERTIES
-    LANGUAGE C
-    )
-  add_library(clang_rt.tsan-x86_64 STATIC
-    ${TSAN_SOURCES}
-    ${TSAN_ASM_SOURCES}
-    $<TARGET_OBJECTS:RTInterception.x86_64>
-    $<TARGET_OBJECTS:RTSanitizerCommon.x86_64>
-    )
-  set_target_compile_flags(clang_rt.tsan-x86_64
-    ${TSAN_CFLAGS} ${TARGET_X86_64_CFLAGS}
-    )
-  list(APPEND TSAN_RUNTIME_LIBRARIES clang_rt.tsan-x86_64)
-endif()
-
-if(TSAN_RUNTIME_LIBRARIES)
-  set_property(TARGET ${TSAN_RUNTIME_LIBRARIES} APPEND PROPERTY
-    COMPILE_DEFINITIONS ${TSAN_COMMON_DEFINITIONS})
-  add_clang_compiler_rt_libraries(${TSAN_RUNTIME_LIBRARIES})
+    LANGUAGE C)
+  set(arch "x86_64")
+  add_compiler_rt_static_runtime(clang_rt.tsan-${arch} ${arch}
+    SOURCES ${TSAN_SOURCES} ${TSAN_ASM_SOURCES}
+            $<TARGET_OBJECTS:RTInterception.${arch}>
+            $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+    CFLAGS ${TSAN_CFLAGS}
+    DEFS ${TSAN_COMMON_DEFINITIONS})
+  list(APPEND TSAN_RUNTIME_LIBRARIES clang_rt.tsan-${arch})
 endif()
diff --git a/lib/tsan/rtl/Makefile.mk b/lib/tsan/rtl/Makefile.mk
index a6a7fc8..2687123 100644
--- a/lib/tsan/rtl/Makefile.mk
+++ b/lib/tsan/rtl/Makefile.mk
@@ -19,7 +19,7 @@
 # FIXME: use automatic dependencies?
 Dependencies := $(wildcard $(Dir)/*.h)
 Dependencies += $(wildcard $(Dir)/../../interception/*.h)
-Dependencies += $(wildcard $(Dir)/../../interception/mach_override/*.h)
+Dependencies += $(wildcard $(Dir)/../../sanitizer_common/*.h)
 
 # Define a convenience variable for all the tsan functions.
 TsanFunctions += $(Sources:%.cc=%) $(AsmSources:%.S=%)
diff --git a/lib/tsan/rtl/Makefile.old b/lib/tsan/rtl/Makefile.old
index 89ce832..f522ec6 100644
--- a/lib/tsan/rtl/Makefile.old
+++ b/lib/tsan/rtl/Makefile.old
@@ -1,6 +1,9 @@
-CXXFLAGS = -fPIE -g -Wall -Werror -fno-builtin -DTSAN_DEBUG=$(DEBUG)
+CXXFLAGS = -fPIE -g -Wall -Werror -fno-builtin -DTSAN_DEBUG=$(DEBUG) -DSANITIZER_DEBUG=$(DEBUG)
 ifeq ($(DEBUG), 0)
-	CXXFLAGS += -O3
+  CXXFLAGS += -O3
+endif
+ifeq ($(CXX), clang++)
+  CXXFLAGS+= -Wgnu
 endif
 
 # For interception. FIXME: move interception one level higher.
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index e0c0473..5c5ab9e 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -28,16 +28,19 @@
 const bool kGoMode = true;
 const bool kCppMode = false;
 const char *const kTsanOptionsEnv = "GORACE";
+// Go linker does not support weak symbols.
+#define CPP_WEAK
 #else
 const bool kGoMode = false;
 const bool kCppMode = true;
 const char *const kTsanOptionsEnv = "TSAN_OPTIONS";
+#define CPP_WEAK WEAK
 #endif
 
 const int kTidBits = 13;
 const unsigned kMaxTid = 1 << kTidBits;
 const unsigned kMaxTidInClock = kMaxTid * 2;  // This includes msb 'freed' bit.
-const int kClkBits = 43;
+const int kClkBits = 42;
 #ifndef TSAN_GO
 const int kShadowStackSize = 4 * 1024;
 const int kTraceStackSize = 256;
diff --git a/lib/tsan/rtl/tsan_fd.cc b/lib/tsan/rtl/tsan_fd.cc
new file mode 100644
index 0000000..b3cb884
--- /dev/null
+++ b/lib/tsan/rtl/tsan_fd.cc
@@ -0,0 +1,265 @@
+//===-- tsan_fd.cc --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_fd.h"
+#include "tsan_rtl.h"
+#include <sanitizer_common/sanitizer_atomic.h>
+
+namespace __tsan {
+
+const int kTableSizeL1 = 1024;
+const int kTableSizeL2 = 1024;
+const int kTableSize = kTableSizeL1 * kTableSizeL2;
+
+struct FdSync {
+  atomic_uint64_t rc;
+};
+
+struct FdDesc {
+  FdSync *sync;
+  int creation_tid;
+  u32 creation_stack;
+};
+
+struct FdContext {
+  atomic_uintptr_t tab[kTableSizeL1];
+  // Addresses used for synchronization.
+  FdSync globsync;
+  FdSync filesync;
+  FdSync socksync;
+  u64 connectsync;
+};
+
+static FdContext fdctx;
+
+static FdSync *allocsync() {
+  FdSync *s = (FdSync*)internal_alloc(MBlockFD, sizeof(FdSync));
+  atomic_store(&s->rc, 1, memory_order_relaxed);
+  return s;
+}
+
+static FdSync *ref(FdSync *s) {
+  if (s && atomic_load(&s->rc, memory_order_relaxed) != (u64)-1)
+    atomic_fetch_add(&s->rc, 1, memory_order_relaxed);
+  return s;
+}
+
+static void unref(ThreadState *thr, uptr pc, FdSync *s) {
+  if (s && atomic_load(&s->rc, memory_order_relaxed) != (u64)-1) {
+    if (atomic_fetch_sub(&s->rc, 1, memory_order_acq_rel) == 1) {
+      CHECK_NE(s, &fdctx.globsync);
+      CHECK_NE(s, &fdctx.filesync);
+      CHECK_NE(s, &fdctx.socksync);
+      SyncVar *v = CTX()->synctab.GetAndRemove(thr, pc, (uptr)s);
+      if (v)
+        DestroyAndFree(v);
+      internal_free(s);
+    }
+  }
+}
+
+static FdDesc *fddesc(ThreadState *thr, uptr pc, int fd) {
+  CHECK_LT(fd, kTableSize);
+  atomic_uintptr_t *pl1 = &fdctx.tab[fd / kTableSizeL2];
+  uptr l1 = atomic_load(pl1, memory_order_consume);
+  if (l1 == 0) {
+    uptr size = kTableSizeL2 * sizeof(FdDesc);
+    void *p = internal_alloc(MBlockFD, size);
+    internal_memset(p, 0, size);
+    MemoryResetRange(thr, (uptr)&fddesc, (uptr)p, size);
+    if (atomic_compare_exchange_strong(pl1, &l1, (uptr)p, memory_order_acq_rel))
+      l1 = (uptr)p;
+    else
+      internal_free(p);
+  }
+  return &((FdDesc*)l1)[fd % kTableSizeL2];  // NOLINT
+}
+
+// pd must be already ref'ed.
+static void init(ThreadState *thr, uptr pc, int fd, FdSync *s) {
+  FdDesc *d = fddesc(thr, pc, fd);
+  // As a matter of fact, we don't intercept all close calls.
+  // See e.g. libc __res_iclose().
+  if (d->sync) {
+    unref(thr, pc, d->sync);
+    d->sync = 0;
+  }
+  if (flags()->io_sync == 0) {
+    unref(thr, pc, s);
+  } else if (flags()->io_sync == 1) {
+    d->sync = s;
+  } else if (flags()->io_sync == 2) {
+    unref(thr, pc, s);
+    d->sync = &fdctx.globsync;
+  }
+  d->creation_tid = thr->tid;
+  d->creation_stack = CurrentStackId(thr, pc);
+  // To catch races between fd usage and open.
+  MemoryRangeImitateWrite(thr, pc, (uptr)d, 8);
+}
+
+void FdInit() {
+  atomic_store(&fdctx.globsync.rc, (u64)-1, memory_order_relaxed);
+  atomic_store(&fdctx.filesync.rc, (u64)-1, memory_order_relaxed);
+  atomic_store(&fdctx.socksync.rc, (u64)-1, memory_order_relaxed);
+}
+
+void FdOnFork(ThreadState *thr, uptr pc) {
+  // On fork() we need to reset all fd's, because the child is going
+  // close all them, and that will cause races between previous read/write
+  // and the close.
+  for (int l1 = 0; l1 < kTableSizeL1; l1++) {
+    FdDesc *tab = (FdDesc*)atomic_load(&fdctx.tab[l1], memory_order_relaxed);
+    if (tab == 0)
+      break;
+    for (int l2 = 0; l2 < kTableSizeL2; l2++) {
+      FdDesc *d = &tab[l2];
+      MemoryResetRange(thr, pc, (uptr)d, 8);
+    }
+  }
+}
+
+bool FdLocation(uptr addr, int *fd, int *tid, u32 *stack) {
+  for (int l1 = 0; l1 < kTableSizeL1; l1++) {
+    FdDesc *tab = (FdDesc*)atomic_load(&fdctx.tab[l1], memory_order_relaxed);
+    if (tab == 0)
+      break;
+    if (addr >= (uptr)tab && addr < (uptr)(tab + kTableSizeL2)) {
+      int l2 = (addr - (uptr)tab) / sizeof(FdDesc);
+      FdDesc *d = &tab[l2];
+      *fd = l1 * kTableSizeL1 + l2;
+      *tid = d->creation_tid;
+      *stack = d->creation_stack;
+      return true;
+    }
+  }
+  return false;
+}
+
+void FdAcquire(ThreadState *thr, uptr pc, int fd) {
+  FdDesc *d = fddesc(thr, pc, fd);
+  FdSync *s = d->sync;
+  DPrintf("#%d: FdAcquire(%d) -> %p\n", thr->tid, fd, s);
+  MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+  if (s)
+    Acquire(thr, pc, (uptr)s);
+}
+
+void FdRelease(ThreadState *thr, uptr pc, int fd) {
+  FdDesc *d = fddesc(thr, pc, fd);
+  FdSync *s = d->sync;
+  DPrintf("#%d: FdRelease(%d) -> %p\n", thr->tid, fd, s);
+  if (s)
+    Release(thr, pc, (uptr)s);
+  MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+}
+
+void FdAccess(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdAccess(%d)\n", thr->tid, fd);
+  FdDesc *d = fddesc(thr, pc, fd);
+  MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+}
+
+void FdClose(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdClose(%d)\n", thr->tid, fd);
+  FdDesc *d = fddesc(thr, pc, fd);
+  // To catch races between fd usage and close.
+  MemoryWrite(thr, pc, (uptr)d, kSizeLog8);
+  // We need to clear it, because if we do not intercept any call out there
+  // that creates fd, we will hit false postives.
+  MemoryResetRange(thr, pc, (uptr)d, 8);
+  unref(thr, pc, d->sync);
+  d->sync = 0;
+  d->creation_tid = 0;
+  d->creation_stack = 0;
+}
+
+void FdFileCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdFileCreate(%d)\n", thr->tid, fd);
+  init(thr, pc, fd, &fdctx.filesync);
+}
+
+void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd) {
+  DPrintf("#%d: FdDup(%d, %d)\n", thr->tid, oldfd, newfd);
+  // Ignore the case when user dups not yet connected socket.
+  FdDesc *od = fddesc(thr, pc, oldfd);
+  MemoryRead(thr, pc, (uptr)od, kSizeLog8);
+  FdClose(thr, pc, newfd);
+  init(thr, pc, newfd, ref(od->sync));
+}
+
+void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd) {
+  DPrintf("#%d: FdCreatePipe(%d, %d)\n", thr->tid, rfd, wfd);
+  FdSync *s = allocsync();
+  init(thr, pc, rfd, ref(s));
+  init(thr, pc, wfd, ref(s));
+  unref(thr, pc, s);
+}
+
+void FdEventCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdEventCreate(%d)\n", thr->tid, fd);
+  init(thr, pc, fd, allocsync());
+}
+
+void FdSignalCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdSignalCreate(%d)\n", thr->tid, fd);
+  init(thr, pc, fd, 0);
+}
+
+void FdInotifyCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdInotifyCreate(%d)\n", thr->tid, fd);
+  init(thr, pc, fd, 0);
+}
+
+void FdPollCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdPollCreate(%d)\n", thr->tid, fd);
+  init(thr, pc, fd, allocsync());
+}
+
+void FdSocketCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdSocketCreate(%d)\n", thr->tid, fd);
+  // It can be a UDP socket.
+  init(thr, pc, fd, &fdctx.socksync);
+}
+
+void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd) {
+  DPrintf("#%d: FdSocketAccept(%d, %d)\n", thr->tid, fd, newfd);
+  // Synchronize connect->accept.
+  Acquire(thr, pc, (uptr)&fdctx.connectsync);
+  init(thr, pc, newfd, &fdctx.socksync);
+}
+
+void FdSocketConnecting(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdSocketConnecting(%d)\n", thr->tid, fd);
+  // Synchronize connect->accept.
+  Release(thr, pc, (uptr)&fdctx.connectsync);
+}
+
+void FdSocketConnect(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdSocketConnect(%d)\n", thr->tid, fd);
+  init(thr, pc, fd, &fdctx.socksync);
+}
+
+uptr File2addr(char *path) {
+  (void)path;
+  static u64 addr;
+  return (uptr)&addr;
+}
+
+uptr Dir2addr(char *path) {
+  (void)path;
+  static u64 addr;
+  return (uptr)&addr;
+}
+
+}  //  namespace __tsan
diff --git a/lib/tsan/rtl/tsan_fd.h b/lib/tsan/rtl/tsan_fd.h
new file mode 100644
index 0000000..979198e
--- /dev/null
+++ b/lib/tsan/rtl/tsan_fd.h
@@ -0,0 +1,65 @@
+//===-- tsan_fd.h -----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// This file handles synchronization via IO.
+// People use IO for synchronization along the lines of:
+//
+// int X;
+// int client_socket;  // initialized elsewhere
+// int server_socket;  // initialized elsewhere
+//
+// Thread 1:
+// X = 42;
+// send(client_socket, ...);
+//
+// Thread 2:
+// if (recv(server_socket, ...) > 0)
+//   assert(X == 42);
+//
+// This file determines the scope of the file descriptor (pipe, socket,
+// all local files, etc) and executes acquire and release operations on
+// the scope as necessary.  Some scopes are very fine grained (e.g. pipe
+// operations synchronize only with operations on the same pipe), while
+// others are corse-grained (e.g. all operations on local files synchronize
+// with each other).
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_FD_H
+#define TSAN_FD_H
+
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+void FdInit();
+void FdAcquire(ThreadState *thr, uptr pc, int fd);
+void FdRelease(ThreadState *thr, uptr pc, int fd);
+void FdAccess(ThreadState *thr, uptr pc, int fd);
+void FdClose(ThreadState *thr, uptr pc, int fd);
+void FdFileCreate(ThreadState *thr, uptr pc, int fd);
+void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd);
+void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd);
+void FdEventCreate(ThreadState *thr, uptr pc, int fd);
+void FdSignalCreate(ThreadState *thr, uptr pc, int fd);
+void FdInotifyCreate(ThreadState *thr, uptr pc, int fd);
+void FdPollCreate(ThreadState *thr, uptr pc, int fd);
+void FdSocketCreate(ThreadState *thr, uptr pc, int fd);
+void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd);
+void FdSocketConnecting(ThreadState *thr, uptr pc, int fd);
+void FdSocketConnect(ThreadState *thr, uptr pc, int fd);
+bool FdLocation(uptr addr, int *fd, int *tid, u32 *stack);
+void FdOnFork(ThreadState *thr, uptr pc);
+
+uptr File2addr(char *path);
+uptr Dir2addr(char *path);
+
+}  // namespace __tsan
+
+#endif  // TSAN_INTERFACE_H
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
index 25c370f..1ed04dd 100644
--- a/lib/tsan/rtl/tsan_flags.cc
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -45,6 +45,7 @@
   f->report_thread_leaks = true;
   f->report_destroy_locked = true;
   f->report_signal_unsafe = true;
+  f->report_atomic_races = true;
   f->force_seq_cst_atomics = false;
   f->strip_path_prefix = "";
   f->suppressions = "";
@@ -58,6 +59,7 @@
   f->running_on_valgrind = false;
   f->external_symbolizer_path = "";
   f->history_size = kGoMode ? 1 : 2;  // There are a lot of goroutines in Go.
+  f->io_sync = 1;
 
   // Let a frontend override.
   OverrideFlags(f);
@@ -71,6 +73,7 @@
   ParseFlag(env, &f->report_thread_leaks, "report_thread_leaks");
   ParseFlag(env, &f->report_destroy_locked, "report_destroy_locked");
   ParseFlag(env, &f->report_signal_unsafe, "report_signal_unsafe");
+  ParseFlag(env, &f->report_atomic_races, "report_atomic_races");
   ParseFlag(env, &f->force_seq_cst_atomics, "force_seq_cst_atomics");
   ParseFlag(env, &f->strip_path_prefix, "strip_path_prefix");
   ParseFlag(env, &f->suppressions, "suppressions");
@@ -83,6 +86,7 @@
   ParseFlag(env, &f->stop_on_start, "stop_on_start");
   ParseFlag(env, &f->external_symbolizer_path, "external_symbolizer_path");
   ParseFlag(env, &f->history_size, "history_size");
+  ParseFlag(env, &f->io_sync, "io_sync");
 
   if (!f->report_bugs) {
     f->report_thread_leaks = false;
@@ -95,6 +99,12 @@
            " (must be [0..7])\n");
     Die();
   }
+
+  if (f->io_sync < 0 || f->io_sync > 2) {
+    Printf("ThreadSanitizer: incorrect value for io_sync"
+           " (must be [0..2])\n");
+    Die();
+  }
 }
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_flags.h b/lib/tsan/rtl/tsan_flags.h
index 2fdcc2f..16849cd 100644
--- a/lib/tsan/rtl/tsan_flags.h
+++ b/lib/tsan/rtl/tsan_flags.h
@@ -43,6 +43,8 @@
   // Report violations of async signal-safety
   // (e.g. malloc() call from a signal handler).
   bool report_signal_unsafe;
+  // Report races between atomic and plain memory accesses.
+  bool report_atomic_races;
   // If set, all atomics are effectively sequentially consistent (seq_cst),
   // regardless of what user actually specified.
   bool force_seq_cst_atomics;
@@ -77,10 +79,15 @@
   // the amount of memory accesses, up to history_size=7 that amounts to
   // 4M memory accesses.  The default value is 2 (128K memory accesses).
   int history_size;
+  // Controls level of synchronization implied by IO operations.
+  // 0 - no synchronization
+  // 1 - reasonable level of synchronization (write->read)
+  // 2 - global synchronization of all IO operations
+  int io_sync;
 };
 
 Flags *flags();
 void InitializeFlags(Flags *flags, const char *env);
-}
+}  // namespace __tsan
 
 #endif  // TSAN_FLAGS_H
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
index dad29cf..25dfe9c 100644
--- a/lib/tsan/rtl/tsan_interceptors.cc
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -9,6 +9,8 @@
 //
 // This file is a part of ThreadSanitizer (TSan), a race detector.
 //
+// FIXME: move as many interceptors as possible into
+// sanitizer_common/sanitizer_common_interceptors.h
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_atomic.h"
@@ -20,6 +22,7 @@
 #include "tsan_platform.h"
 #include "tsan_rtl.h"
 #include "tsan_mman.h"
+#include "tsan_fd.h"
 
 using namespace __tsan;  // NOLINT
 
@@ -50,8 +53,12 @@
 extern "C" int sigfillset(sigset_t *set);
 extern "C" void *pthread_self();
 extern "C" void _exit(int status);
-extern "C" int __cxa_atexit(void (*func)(void *arg), void *arg, void *dso);
 extern "C" int *__errno_location();
+extern "C" int fileno_unlocked(void *stream);
+extern "C" void *__libc_malloc(uptr size);
+extern "C" void *__libc_calloc(uptr size, uptr n);
+extern "C" void *__libc_realloc(void *ptr, uptr size);
+extern "C" void __libc_free(void *ptr);
 const int PTHREAD_MUTEX_RECURSIVE = 1;
 const int PTHREAD_MUTEX_RECURSIVE_NP = 1;
 const int kPthreadAttrSize = 56;
@@ -120,16 +127,14 @@
   int pending_signal_count;
   SignalDesc pending_signals[kSigCount];
 };
-}
+}  // namespace __tsan
 
 static SignalContext *SigCtx(ThreadState *thr) {
   SignalContext *ctx = (SignalContext*)thr->signal_ctx;
   if (ctx == 0 && thr->is_alive) {
     ScopedInRtl in_rtl;
-    ctx = (SignalContext*)internal_alloc(
-        MBlockSignal, sizeof(*ctx));
-    MemoryResetRange(thr, 0, (uptr)ctx, sizeof(*ctx));
-    internal_memset(ctx, 0, sizeof(*ctx));
+    ctx = (SignalContext*)MmapOrDie(sizeof(*ctx), "SignalContext");
+    MemoryResetRange(thr, (uptr)&SigCtx, (uptr)ctx, sizeof(*ctx));
     thr->signal_ctx = ctx;
   }
   return ctx;
@@ -175,8 +180,8 @@
     StatInc(thr, StatInt_##func); \
     const uptr caller_pc = GET_CALLER_PC(); \
     ScopedInterceptor si(thr, #func, caller_pc); \
-    /* Subtract one from pc as we need current instruction address */ \
-    const uptr pc = __sanitizer::StackTrace::GetCurrentPc() - 1; \
+    const uptr pc = __sanitizer::StackTrace::GetPreviousInstructionPc( \
+        __sanitizer::StackTrace::GetCurrentPc()); \
     (void)pc; \
 /**/
 
@@ -238,12 +243,15 @@
 
   typedef void(*atexit_t)();
 
-  int atexit(ThreadState *thr, uptr pc, atexit_t f) {
+  int atexit(ThreadState *thr, uptr pc, bool is_on_exit,
+             atexit_t f, void *arg) {
     Lock l(&mtx_);
     if (pos_ == kMaxAtExit)
       return 1;
     Release(thr, pc, (uptr)this);
     stack_[pos_] = f;
+    args_[pos_] = arg;
+    is_on_exits_[pos_] = is_on_exit;
     pos_++;
     return 0;
   }
@@ -252,11 +260,15 @@
     CHECK_EQ(thr->in_rtl, 0);
     for (;;) {
       atexit_t f = 0;
+      void *arg = 0;
+      bool is_on_exit = false;
       {
         Lock l(&mtx_);
         if (pos_) {
           pos_--;
           f = stack_[pos_];
+          arg = args_[pos_];
+          is_on_exit = is_on_exits_[pos_];
           ScopedInRtl in_rtl;
           Acquire(thr, pc, (uptr)this);
         }
@@ -265,7 +277,10 @@
         break;
       DPrintf("#%d: executing atexit func %p\n", thr->tid, f);
       CHECK_EQ(thr->in_rtl, 0);
-      f();
+      if (is_on_exit)
+        ((void(*)(int status, void *arg))f)(0, arg);
+      else
+        ((void(*)(void *arg, void *dso))f)(arg, 0);
     }
   }
 
@@ -273,6 +288,8 @@
   static const int kMaxAtExit = 128;
   Mutex mtx_;
   atexit_t stack_[kMaxAtExit];
+  void *args_[kMaxAtExit];
+  bool is_on_exits_[kMaxAtExit];
   int pos_;
 };
 
@@ -282,18 +299,32 @@
   ThreadState * thr = cur_thread();
   uptr pc = 0;
   atexit_ctx->exit(thr, pc);
-  {
-    ScopedInRtl in_rtl;
-    DestroyAndFree(atexit_ctx);
-  }
   int status = Finalize(cur_thread());
   if (status)
     _exit(status);
 }
 
 TSAN_INTERCEPTOR(int, atexit, void (*f)()) {
+  if (cur_thread()->in_symbolizer)
+    return 0;
   SCOPED_TSAN_INTERCEPTOR(atexit, f);
-  return atexit_ctx->atexit(thr, pc, f);
+  return atexit_ctx->atexit(thr, pc, false, (void(*)())f, 0);
+}
+
+TSAN_INTERCEPTOR(int, on_exit, void(*f)(int, void*), void *arg) {
+  if (cur_thread()->in_symbolizer)
+    return 0;
+  SCOPED_TSAN_INTERCEPTOR(on_exit, f, arg);
+  return atexit_ctx->atexit(thr, pc, true, (void(*)())f, arg);
+}
+
+TSAN_INTERCEPTOR(int, __cxa_atexit, void (*f)(void *a), void *arg, void *dso) {
+  if (cur_thread()->in_symbolizer)
+    return 0;
+  SCOPED_TSAN_INTERCEPTOR(__cxa_atexit, f, arg, dso);
+  if (dso)
+    return REAL(__cxa_atexit)(f, arg, dso);
+  return atexit_ctx->atexit(thr, pc, false, (void(*)())f, arg);
 }
 
 TSAN_INTERCEPTOR(void, longjmp, void *env, int val) {
@@ -308,217 +339,9 @@
   Die();
 }
 
-enum FdType {
-  FdGlobal,  // Something we don't know about, global sync.
-  FdNone,  // Does not require any sync.
-  FdFile,
-  FdSock,
-  FdPipe,
-  FdEvent,  // see eventfd()
-  FdPoll
-};
-
-struct FdDesc {
-  FdType type;
-  u64 sync;
-};
-
-struct FdContext {
-  static const int kMaxFds = 10 * 1024;  // Everything else is synced globally.
-  FdDesc desc[kMaxFds];
-  // Addresses used for synchronization.
-  u64 fdglobal;
-  u64 fdfile;
-  u64 fdsock;
-  u64 fdpipe;
-  u64 fdpoll;
-  u64 fdevent;
-};
-
-static FdContext fdctx;
-
-static void FdInit() {
-  fdctx.desc[0].type = FdNone;
-  fdctx.desc[1].type = FdNone;
-  fdctx.desc[2].type = FdNone;
-}
-
-static void *FdAddr(int fd) {
-  if (fd >= FdContext::kMaxFds)
-    return &fdctx.fdglobal;
-  FdDesc *desc = &fdctx.desc[fd];
-  if (desc->type == FdNone)
-    return 0;
-  if (desc->type == FdGlobal)
-    return &fdctx.fdglobal;
-  if (desc->type == FdFile)
-    return &fdctx.fdfile;
-  if (desc->type == FdSock)
-    return &fdctx.fdsock;
-  if (desc->type == FdPipe)
-    return &fdctx.fdpipe;
-  if (desc->type == FdEvent)
-    return &fdctx.fdevent;
-  if (desc->type == FdPoll)
-    return &fdctx.fdpoll;
-  CHECK(0);
-  return 0;
-}
-
-static void FdAcquire(ThreadState *thr, uptr pc, int fd) {
-  void *addr = FdAddr(fd);
-  DPrintf("#%d: FdAcquire(%d) -> %p\n", thr->tid, fd, addr);
-  if (addr)
-    Acquire(thr, pc, (uptr)addr);
-  if (fd < FdContext::kMaxFds)
-    MemoryRead8Byte(thr, pc, (uptr)&fdctx.desc[fd].sync);
-}
-
-static void FdRelease(ThreadState *thr, uptr pc, int fd) {
-  void *addr = FdAddr(fd);
-  DPrintf("#%d: FdRelease(%d) -> %p\n", thr->tid, fd, addr);
-  if (addr)
-    Release(thr, pc, (uptr)addr);
-  if (fd < FdContext::kMaxFds)
-    MemoryRead8Byte(thr, pc, (uptr)&fdctx.desc[fd].sync);
-}
-
-static void FdClose(ThreadState *thr, uptr pc, int fd) {
-  if (fd >= FdContext::kMaxFds)
-    return;
-  FdDesc *desc = &fdctx.desc[fd];
-  SyncVar *s = CTX()->synctab.GetAndRemove(thr, pc, (uptr)&desc->sync);
-  if (s)
-    DestroyAndFree(s);
-  // FIXME(dvyukov): change to FdNone once we handle all fd operations.
-  desc->type = FdGlobal;
-  // To catch races between fd usage and close.
-  MemoryWrite8Byte(thr, pc, (uptr)&desc->sync);
-  // We need to clear it, because if we do not intercept any call out there
-  // that creates fd, we will hit false postives.
-  MemoryResetRange(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
-}
-
-static void FdFileCreate(ThreadState *thr, uptr pc, int fd) {
-  if (fd >= FdContext::kMaxFds)
-    return;
-  FdDesc *desc = &fdctx.desc[fd];
-  desc->type = FdFile;
-  // To catch races between fd usage and open.
-  MemoryRangeImitateWrite(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
-}
-
-static void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd) {
-  if (oldfd >= FdContext::kMaxFds || newfd >= FdContext::kMaxFds) {
-    if (oldfd < FdContext::kMaxFds) {
-      // FIXME(dvyukov): here we lose old sync object associated with the fd,
-      // this can lead to false positives.
-      FdDesc *odesc = &fdctx.desc[oldfd];
-      odesc->type = FdGlobal;
-    }
-    if (newfd < FdContext::kMaxFds) {
-      FdClose(thr, pc, newfd);
-      FdDesc *ndesc = &fdctx.desc[newfd];
-      ndesc->type = FdGlobal;
-    }
-    return;
-  }
-
-  FdClose(thr, pc, newfd);
-  FdDesc *ndesc = &fdctx.desc[newfd];
-  ndesc->type = FdFile;
-  // To catch races between fd usage and open.
-  MemoryRangeImitateWrite(thr, pc, (uptr)&ndesc->sync, sizeof(ndesc->sync));
-}
-
-static void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd) {
-  if (rfd >= FdContext::kMaxFds || wfd >= FdContext::kMaxFds) {
-    if (rfd < FdContext::kMaxFds) {
-      FdDesc *rdesc = &fdctx.desc[rfd];
-      rdesc->type = FdGlobal;
-    }
-    if (wfd < FdContext::kMaxFds) {
-      FdDesc *wdesc = &fdctx.desc[wfd];
-      wdesc->type = FdGlobal;
-    }
-    return;
-  }
-
-  FdDesc *rdesc = &fdctx.desc[rfd];
-  rdesc->type = FdPipe;
-  // To catch races between fd usage and open.
-  MemoryRangeImitateWrite(thr, pc, (uptr)&rdesc->sync, sizeof(rdesc->sync));
-
-  FdDesc *wdesc = &fdctx.desc[wfd];
-  wdesc->type = FdPipe;
-  // To catch races between fd usage and open.
-  MemoryRangeImitateWrite(thr, pc, (uptr)&wdesc->sync, sizeof(rdesc->sync));
-
-  DPrintf("#%d: FdCreatePipe(%d, %d)\n", thr->tid, rfd, wfd);
-}
-
-static void FdEventCreate(ThreadState *thr, uptr pc, int fd) {
-  if (fd >= FdContext::kMaxFds)
-    return;
-  FdDesc *desc = &fdctx.desc[fd];
-  desc->type = FdEvent;
-  // To catch races between fd usage and open.
-  MemoryRangeImitateWrite(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
-}
-
-static void FdPollCreate(ThreadState *thr, uptr pc, int fd) {
-  if (fd >= FdContext::kMaxFds)
-    return;
-  FdDesc *desc = &fdctx.desc[fd];
-  desc->type = FdPoll;
-  // To catch races between fd usage and open.
-  MemoryRangeImitateWrite(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
-}
-
-static void FdSocketCreate(ThreadState *thr, uptr pc, int fd) {
-  if (fd >= FdContext::kMaxFds)
-    return;
-  FdDesc *desc = &fdctx.desc[fd];
-  // It can be UDP socket, let's assume they are not used for synchronization.
-  desc->type = FdNone;
-  // To catch races between fd usage and open.
-  MemoryRangeImitateWrite(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
-}
-
-static void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd) {
-  if (fd < FdContext::kMaxFds) {
-    FdDesc *desc = &fdctx.desc[fd];
-    desc->type = FdNone;
-    MemoryRead8Byte(thr, pc, (uptr)&desc->sync);
-  }
-  if (newfd < FdContext::kMaxFds) {
-    FdDesc *desc = &fdctx.desc[newfd];
-    desc->type = FdSock;
-    MemoryWrite8Byte(thr, pc, (uptr)&desc->sync);
-  }
-}
-
-static void FdSocketConnect(ThreadState *thr, uptr pc, int fd) {
-  if (fd >= FdContext::kMaxFds)
-    return;
-  FdDesc *desc = &fdctx.desc[fd];
-  desc->type = FdSock;
-  MemoryWrite8Byte(thr, pc, (uptr)&desc->sync);
-}
-
-static uptr file2addr(char *path) {
-  (void)path;
-  static u64 addr;
-  return (uptr)&addr;
-}
-
-static uptr dir2addr(char *path) {
-  (void)path;
-  static u64 addr;
-  return (uptr)&addr;
-}
-
 TSAN_INTERCEPTOR(void*, malloc, uptr size) {
+  if (cur_thread()->in_symbolizer)
+    return __libc_malloc(size);
   void *p = 0;
   {
     SCOPED_INTERCEPTOR_RAW(malloc, size);
@@ -534,6 +357,9 @@
 }
 
 TSAN_INTERCEPTOR(void*, calloc, uptr size, uptr n) {
+  if (cur_thread()->in_symbolizer)
+    return __libc_calloc(size, n);
+  if (__sanitizer::CallocShouldReturnNullDueToOverflow(size, n)) return 0;
   void *p = 0;
   {
     SCOPED_INTERCEPTOR_RAW(calloc, size, n);
@@ -545,6 +371,8 @@
 }
 
 TSAN_INTERCEPTOR(void*, realloc, void *p, uptr size) {
+  if (cur_thread()->in_symbolizer)
+    return __libc_realloc(p, size);
   if (p)
     invoke_free_hook(p);
   {
@@ -558,6 +386,8 @@
 TSAN_INTERCEPTOR(void, free, void *p) {
   if (p == 0)
     return;
+  if (cur_thread()->in_symbolizer)
+    return __libc_free(p);
   invoke_free_hook(p);
   SCOPED_INTERCEPTOR_RAW(free, p);
   user_free(thr, pc, p);
@@ -566,12 +396,21 @@
 TSAN_INTERCEPTOR(void, cfree, void *p) {
   if (p == 0)
     return;
+  if (cur_thread()->in_symbolizer)
+    return __libc_free(p);
   invoke_free_hook(p);
   SCOPED_INTERCEPTOR_RAW(cfree, p);
   user_free(thr, pc, p);
 }
 
+TSAN_INTERCEPTOR(uptr, malloc_usable_size, void *p) {
+  SCOPED_INTERCEPTOR_RAW(malloc_usable_size, p);
+  return user_alloc_usable_size(thr, pc, p);
+}
+
 #define OPERATOR_NEW_BODY(mangled_name) \
+  if (cur_thread()->in_symbolizer) \
+    return __libc_malloc(size); \
   void *p = 0; \
   {  \
     SCOPED_INTERCEPTOR_RAW(mangled_name, size); \
@@ -595,6 +434,8 @@
 
 #define OPERATOR_DELETE_BODY(mangled_name) \
   if (ptr == 0) return;  \
+  if (cur_thread()->in_symbolizer) \
+    return __libc_free(ptr); \
   invoke_free_hook(ptr);  \
   SCOPED_INTERCEPTOR_RAW(mangled_name, ptr);  \
   user_free(thr, pc, ptr);
@@ -759,6 +600,8 @@
     return MAP_FAILED;
   void *res = REAL(mmap)(addr, sz, prot, flags, fd, off);
   if (res != MAP_FAILED) {
+    if (fd > 0)
+      FdAccess(thr, pc, fd);
     MemoryResetRange(thr, pc, (uptr)res, sz);
   }
   return res;
@@ -771,6 +614,8 @@
     return MAP_FAILED;
   void *res = REAL(mmap64)(addr, sz, prot, flags, fd, off);
   if (res != MAP_FAILED) {
+    if (fd > 0)
+      FdAccess(thr, pc, fd);
     MemoryResetRange(thr, pc, (uptr)res, sz);
   }
   return res;
@@ -848,7 +693,7 @@
     SignalContext *sctx = thr->signal_ctx;
     if (sctx) {
       thr->signal_ctx = 0;
-      internal_free(sctx);
+      UnmapOrDie(sctx, sizeof(*sctx));
     }
   }
 }
@@ -1122,11 +967,15 @@
   return res;
 }
 
+// libpthread.so contains several versions of pthread_cond_init symbol.
+// When we just dlsym() it, we get the wrong (old) version.
+/*
 TSAN_INTERCEPTOR(int, pthread_cond_init, void *c, void *a) {
   SCOPED_TSAN_INTERCEPTOR(pthread_cond_init, c, a);
   int res = REAL(pthread_cond_init)(c, a);
   return res;
 }
+*/
 
 TSAN_INTERCEPTOR(int, pthread_cond_destroy, void *c) {
   SCOPED_TSAN_INTERCEPTOR(pthread_cond_destroy, c);
@@ -1164,14 +1013,14 @@
 
 TSAN_INTERCEPTOR(int, pthread_barrier_init, void *b, void *a, unsigned count) {
   SCOPED_TSAN_INTERCEPTOR(pthread_barrier_init, b, a, count);
-  MemoryWrite1Byte(thr, pc, (uptr)b);
+  MemoryWrite(thr, pc, (uptr)b, kSizeLog1);
   int res = REAL(pthread_barrier_init)(b, a, count);
   return res;
 }
 
 TSAN_INTERCEPTOR(int, pthread_barrier_destroy, void *b) {
   SCOPED_TSAN_INTERCEPTOR(pthread_barrier_destroy, b);
-  MemoryWrite1Byte(thr, pc, (uptr)b);
+  MemoryWrite(thr, pc, (uptr)b, kSizeLog1);
   int res = REAL(pthread_barrier_destroy)(b);
   return res;
 }
@@ -1179,9 +1028,9 @@
 TSAN_INTERCEPTOR(int, pthread_barrier_wait, void *b) {
   SCOPED_TSAN_INTERCEPTOR(pthread_barrier_wait, b);
   Release(thr, pc, (uptr)b);
-  MemoryRead1Byte(thr, pc, (uptr)b);
+  MemoryRead(thr, pc, (uptr)b, kSizeLog1);
   int res = REAL(pthread_barrier_wait)(b);
-  MemoryRead1Byte(thr, pc, (uptr)b);
+  MemoryRead(thr, pc, (uptr)b, kSizeLog1);
   if (res == 0 || res == PTHREAD_BARRIER_SERIAL_THREAD) {
     Acquire(thr, pc, (uptr)b);
   }
@@ -1268,6 +1117,74 @@
   return res;
 }
 
+TSAN_INTERCEPTOR(int, __xstat, int version, const char *path, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__xstat, version, path, buf);
+  return REAL(__xstat)(version, path, buf);
+}
+
+TSAN_INTERCEPTOR(int, stat, const char *path, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__xstat, 0, path, buf);
+  return REAL(__xstat)(0, path, buf);
+}
+
+TSAN_INTERCEPTOR(int, __xstat64, int version, const char *path, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__xstat64, version, path, buf);
+  return REAL(__xstat64)(version, path, buf);
+}
+
+TSAN_INTERCEPTOR(int, stat64, const char *path, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__xstat64, 0, path, buf);
+  return REAL(__xstat64)(0, path, buf);
+}
+
+TSAN_INTERCEPTOR(int, __lxstat, int version, const char *path, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__lxstat, version, path, buf);
+  return REAL(__lxstat)(version, path, buf);
+}
+
+TSAN_INTERCEPTOR(int, lstat, const char *path, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__lxstat, 0, path, buf);
+  return REAL(__lxstat)(0, path, buf);
+}
+
+TSAN_INTERCEPTOR(int, __lxstat64, int version, const char *path, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__lxstat64, version, path, buf);
+  return REAL(__lxstat64)(version, path, buf);
+}
+
+TSAN_INTERCEPTOR(int, lstat64, const char *path, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__lxstat64, 0, path, buf);
+  return REAL(__lxstat64)(0, path, buf);
+}
+
+TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__fxstat, version, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat)(version, fd, buf);
+}
+
+TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat)(0, fd, buf);
+}
+
+TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat64)(version, fd, buf);
+}
+
+TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat64)(0, fd, buf);
+}
+
 TSAN_INTERCEPTOR(int, open, const char *name, int flags, int mode) {
   SCOPED_TSAN_INTERCEPTOR(open, name, flags, mode);
   int fd = REAL(open)(name, flags, mode);
@@ -1276,6 +1193,14 @@
   return fd;
 }
 
+TSAN_INTERCEPTOR(int, open64, const char *name, int flags, int mode) {
+  SCOPED_TSAN_INTERCEPTOR(open64, name, flags, mode);
+  int fd = REAL(open64)(name, flags, mode);
+  if (fd >= 0)
+    FdFileCreate(thr, pc, fd);
+  return fd;
+}
+
 TSAN_INTERCEPTOR(int, creat, const char *name, int mode) {
   SCOPED_TSAN_INTERCEPTOR(creat, name, mode);
   int fd = REAL(creat)(name, mode);
@@ -1284,10 +1209,18 @@
   return fd;
 }
 
+TSAN_INTERCEPTOR(int, creat64, const char *name, int mode) {
+  SCOPED_TSAN_INTERCEPTOR(creat64, name, mode);
+  int fd = REAL(creat64)(name, mode);
+  if (fd >= 0)
+    FdFileCreate(thr, pc, fd);
+  return fd;
+}
+
 TSAN_INTERCEPTOR(int, dup, int oldfd) {
   SCOPED_TSAN_INTERCEPTOR(dup, oldfd);
   int newfd = REAL(dup)(oldfd);
-  if (newfd >= 0 && newfd != oldfd)
+  if (oldfd >= 0 && newfd >= 0 && newfd != oldfd)
     FdDup(thr, pc, oldfd, newfd);
   return newfd;
 }
@@ -1295,7 +1228,7 @@
 TSAN_INTERCEPTOR(int, dup2, int oldfd, int newfd) {
   SCOPED_TSAN_INTERCEPTOR(dup2, oldfd, newfd);
   int newfd2 = REAL(dup2)(oldfd, newfd);
-  if (newfd2 >= 0 && newfd2 != oldfd)
+  if (oldfd >= 0 && newfd2 >= 0 && newfd2 != oldfd)
     FdDup(thr, pc, oldfd, newfd2);
   return newfd2;
 }
@@ -1303,7 +1236,7 @@
 TSAN_INTERCEPTOR(int, dup3, int oldfd, int newfd, int flags) {
   SCOPED_TSAN_INTERCEPTOR(dup3, oldfd, newfd, flags);
   int newfd2 = REAL(dup3)(oldfd, newfd, flags);
-  if (newfd2 >= 0 && newfd2 != oldfd)
+  if (oldfd >= 0 && newfd2 >= 0 && newfd2 != oldfd)
     FdDup(thr, pc, oldfd, newfd2);
   return newfd2;
 }
@@ -1316,6 +1249,32 @@
   return fd;
 }
 
+TSAN_INTERCEPTOR(int, signalfd, int fd, void *mask, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(signalfd, fd, mask, flags);
+  if (fd >= 0)
+    FdClose(thr, pc, fd);
+  fd = REAL(signalfd)(fd, mask, flags);
+  if (fd >= 0)
+    FdSignalCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, inotify_init, int fake) {
+  SCOPED_TSAN_INTERCEPTOR(inotify_init, fake);
+  int fd = REAL(inotify_init)(fake);
+  if (fd >= 0)
+    FdInotifyCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, inotify_init1, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(inotify_init1, flags);
+  int fd = REAL(inotify_init1)(flags);
+  if (fd >= 0)
+    FdInotifyCreate(thr, pc, fd);
+  return fd;
+}
+
 TSAN_INTERCEPTOR(int, socket, int domain, int type, int protocol) {
   SCOPED_TSAN_INTERCEPTOR(socket, domain, type, protocol);
   int fd = REAL(socket)(domain, type, protocol);
@@ -1324,18 +1283,43 @@
   return fd;
 }
 
+TSAN_INTERCEPTOR(int, socketpair, int domain, int type, int protocol, int *fd) {
+  SCOPED_TSAN_INTERCEPTOR(socketpair, domain, type, protocol, fd);
+  int res = REAL(socketpair)(domain, type, protocol, fd);
+  if (res == 0 && fd[0] >= 0 && fd[1] >= 0)
+    FdPipeCreate(thr, pc, fd[0], fd[1]);
+  return res;
+}
+
 TSAN_INTERCEPTOR(int, connect, int fd, void *addr, unsigned addrlen) {
   SCOPED_TSAN_INTERCEPTOR(connect, fd, addr, addrlen);
+  FdSocketConnecting(thr, pc, fd);
   int res = REAL(connect)(fd, addr, addrlen);
-  if (res == 0)
+  if (res == 0 && fd >= 0)
     FdSocketConnect(thr, pc, fd);
   return res;
 }
 
+TSAN_INTERCEPTOR(int, bind, int fd, void *addr, unsigned addrlen) {
+  SCOPED_TSAN_INTERCEPTOR(bind, fd, addr, addrlen);
+  int res = REAL(bind)(fd, addr, addrlen);
+  if (fd > 0 && res == 0)
+    FdAccess(thr, pc, fd);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, listen, int fd, int backlog) {
+  SCOPED_TSAN_INTERCEPTOR(listen, fd, backlog);
+  int res = REAL(listen)(fd, backlog);
+  if (fd > 0 && res == 0)
+    FdAccess(thr, pc, fd);
+  return res;
+}
+
 TSAN_INTERCEPTOR(int, accept, int fd, void *addr, unsigned *addrlen) {
   SCOPED_TSAN_INTERCEPTOR(accept, fd, addr, addrlen);
   int fd2 = REAL(accept)(fd, addr, addrlen);
-  if (fd2 >= 0)
+  if (fd >= 0 && fd2 >= 0)
     FdSocketAccept(thr, pc, fd, fd2);
   return fd2;
 }
@@ -1343,7 +1327,7 @@
 TSAN_INTERCEPTOR(int, accept4, int fd, void *addr, unsigned *addrlen, int f) {
   SCOPED_TSAN_INTERCEPTOR(accept4, fd, addr, addrlen, f);
   int fd2 = REAL(accept4)(fd, addr, addrlen, f);
-  if (fd2 >= 0)
+  if (fd >= 0 && fd2 >= 0)
     FdSocketAccept(thr, pc, fd, fd2);
   return fd2;
 }
@@ -1366,14 +1350,34 @@
 
 TSAN_INTERCEPTOR(int, close, int fd) {
   SCOPED_TSAN_INTERCEPTOR(close, fd);
-  FdClose(thr, pc, fd);
+  if (fd >= 0)
+    FdClose(thr, pc, fd);
   return REAL(close)(fd);
 }
 
+TSAN_INTERCEPTOR(int, __close, int fd) {
+  SCOPED_TSAN_INTERCEPTOR(__close, fd);
+  if (fd >= 0)
+    FdClose(thr, pc, fd);
+  return REAL(__close)(fd);
+}
+
+// glibc guts
+TSAN_INTERCEPTOR(void, __res_iclose, void *state, bool free_addr) {
+  SCOPED_TSAN_INTERCEPTOR(__res_iclose, state, free_addr);
+  int fds[64];
+  int cnt = ExtractResolvFDs(state, fds, ARRAY_SIZE(fds));
+  for (int i = 0; i < cnt; i++) {
+    if (fds[i] > 0)
+      FdClose(thr, pc, fds[i]);
+  }
+  REAL(__res_iclose)(state, free_addr);
+}
+
 TSAN_INTERCEPTOR(int, pipe, int *pipefd) {
   SCOPED_TSAN_INTERCEPTOR(pipe, pipefd);
   int res = REAL(pipe)(pipefd);
-  if (res == 0)
+  if (res == 0 && pipefd[0] >= 0 && pipefd[1] >= 0)
     FdPipeCreate(thr, pc, pipefd[0], pipefd[1]);
   return res;
 }
@@ -1381,42 +1385,15 @@
 TSAN_INTERCEPTOR(int, pipe2, int *pipefd, int flags) {
   SCOPED_TSAN_INTERCEPTOR(pipe2, pipefd, flags);
   int res = REAL(pipe2)(pipefd, flags);
-  if (res == 0)
+  if (res == 0 && pipefd[0] >= 0 && pipefd[1] >= 0)
     FdPipeCreate(thr, pc, pipefd[0], pipefd[1]);
   return res;
 }
 
-TSAN_INTERCEPTOR(long_t, read, int fd, void *buf, long_t sz) {
-  SCOPED_TSAN_INTERCEPTOR(read, fd, buf, sz);
-  int res = REAL(read)(fd, buf, sz);
-  if (res >= 0) {
-    FdAcquire(thr, pc, fd);
-  }
-  return res;
-}
-
-TSAN_INTERCEPTOR(long_t, pread, int fd, void *buf, long_t sz, unsigned off) {
-  SCOPED_TSAN_INTERCEPTOR(pread, fd, buf, sz, off);
-  int res = REAL(pread)(fd, buf, sz, off);
-  if (res >= 0) {
-    FdAcquire(thr, pc, fd);
-  }
-  return res;
-}
-
-TSAN_INTERCEPTOR(long_t, pread64, int fd, void *buf, long_t sz, u64 off) {
-  SCOPED_TSAN_INTERCEPTOR(pread64, fd, buf, sz, off);
-  int res = REAL(pread64)(fd, buf, sz, off);
-  if (res >= 0) {
-    FdAcquire(thr, pc, fd);
-  }
-  return res;
-}
-
 TSAN_INTERCEPTOR(long_t, readv, int fd, void *vec, int cnt) {
   SCOPED_TSAN_INTERCEPTOR(readv, fd, vec, cnt);
   int res = REAL(readv)(fd, vec, cnt);
-  if (res >= 0) {
+  if (res >= 0 && fd >= 0) {
     FdAcquire(thr, pc, fd);
   }
   return res;
@@ -1425,57 +1402,40 @@
 TSAN_INTERCEPTOR(long_t, preadv64, int fd, void *vec, int cnt, u64 off) {
   SCOPED_TSAN_INTERCEPTOR(preadv64, fd, vec, cnt, off);
   int res = REAL(preadv64)(fd, vec, cnt, off);
-  if (res >= 0) {
+  if (res >= 0 && fd >= 0) {
     FdAcquire(thr, pc, fd);
   }
   return res;
 }
 
-TSAN_INTERCEPTOR(long_t, write, int fd, void *buf, long_t sz) {
-  SCOPED_TSAN_INTERCEPTOR(write, fd, buf, sz);
-  FdRelease(thr, pc, fd);
-  int res = REAL(write)(fd, buf, sz);
-  return res;
-}
-
-TSAN_INTERCEPTOR(long_t, pwrite, int fd, void *buf, long_t sz, unsigned off) {
-  SCOPED_TSAN_INTERCEPTOR(pwrite, fd, buf, sz, off);
-  FdRelease(thr, pc, fd);
-  int res = REAL(pwrite)(fd, buf, sz, off);
-  return res;
-}
-
-TSAN_INTERCEPTOR(long_t, pwrite64, int fd, void *buf, long_t sz, u64 off) {
-  SCOPED_TSAN_INTERCEPTOR(pwrite64, fd, buf, sz, off);
-  FdRelease(thr, pc, fd);
-  int res = REAL(pwrite64)(fd, buf, sz, off);
-  return res;
-}
-
 TSAN_INTERCEPTOR(long_t, writev, int fd, void *vec, int cnt) {
   SCOPED_TSAN_INTERCEPTOR(writev, fd, vec, cnt);
-  FdRelease(thr, pc, fd);
+  if (fd >= 0)
+    FdRelease(thr, pc, fd);
   int res = REAL(writev)(fd, vec, cnt);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, pwritev64, int fd, void *vec, int cnt, u64 off) {
   SCOPED_TSAN_INTERCEPTOR(pwritev64, fd, vec, cnt, off);
-  FdRelease(thr, pc, fd);
+  if (fd >= 0)
+    FdRelease(thr, pc, fd);
   int res = REAL(pwritev64)(fd, vec, cnt, off);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, send, int fd, void *buf, long_t len, int flags) {
   SCOPED_TSAN_INTERCEPTOR(send, fd, buf, len, flags);
-  FdRelease(thr, pc, fd);
+  if (fd >= 0)
+    FdRelease(thr, pc, fd);
   int res = REAL(send)(fd, buf, len, flags);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, sendmsg, int fd, void *msg, int flags) {
   SCOPED_TSAN_INTERCEPTOR(sendmsg, fd, msg, flags);
-  FdRelease(thr, pc, fd);
+  if (fd >= 0)
+    FdRelease(thr, pc, fd);
   int res = REAL(sendmsg)(fd, msg, flags);
   return res;
 }
@@ -1483,7 +1443,7 @@
 TSAN_INTERCEPTOR(long_t, recv, int fd, void *buf, long_t len, int flags) {
   SCOPED_TSAN_INTERCEPTOR(recv, fd, buf, len, flags);
   int res = REAL(recv)(fd, buf, len, flags);
-  if (res >= 0) {
+  if (res >= 0 && fd >= 0) {
     FdAcquire(thr, pc, fd);
   }
   return res;
@@ -1492,7 +1452,7 @@
 TSAN_INTERCEPTOR(long_t, recvmsg, int fd, void *msg, int flags) {
   SCOPED_TSAN_INTERCEPTOR(recvmsg, fd, msg, flags);
   int res = REAL(recvmsg)(fd, msg, flags);
-  if (res >= 0) {
+  if (res >= 0 && fd >= 0) {
     FdAcquire(thr, pc, fd);
   }
   return res;
@@ -1500,7 +1460,7 @@
 
 TSAN_INTERCEPTOR(int, unlink, char *path) {
   SCOPED_TSAN_INTERCEPTOR(unlink, path);
-  Release(thr, pc, file2addr(path));
+  Release(thr, pc, File2addr(path));
   int res = REAL(unlink)(path);
   return res;
 }
@@ -1508,19 +1468,57 @@
 TSAN_INTERCEPTOR(void*, fopen, char *path, char *mode) {
   SCOPED_TSAN_INTERCEPTOR(fopen, path, mode);
   void *res = REAL(fopen)(path, mode);
-  Acquire(thr, pc, file2addr(path));
+  Acquire(thr, pc, File2addr(path));
+  if (res) {
+    int fd = fileno_unlocked(res);
+    if (fd >= 0)
+      FdFileCreate(thr, pc, fd);
+  }
   return res;
 }
 
+TSAN_INTERCEPTOR(void*, freopen, char *path, char *mode, void *stream) {
+  SCOPED_TSAN_INTERCEPTOR(freopen, path, mode, stream);
+  if (stream) {
+    int fd = fileno_unlocked(stream);
+    if (fd >= 0)
+      FdClose(thr, pc, fd);
+  }
+  void *res = REAL(freopen)(path, mode, stream);
+  Acquire(thr, pc, File2addr(path));
+  if (res) {
+    int fd = fileno_unlocked(res);
+    if (fd >= 0)
+      FdFileCreate(thr, pc, fd);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, fclose, void *stream) {
+  {
+    SCOPED_TSAN_INTERCEPTOR(fclose, stream);
+    if (stream) {
+      int fd = fileno_unlocked(stream);
+      if (fd >= 0)
+        FdClose(thr, pc, fd);
+    }
+  }
+  return REAL(fclose)(stream);
+}
+
 TSAN_INTERCEPTOR(uptr, fread, void *ptr, uptr size, uptr nmemb, void *f) {
-  SCOPED_TSAN_INTERCEPTOR(fread, ptr, size, nmemb, f);
-  MemoryAccessRange(thr, pc, (uptr)ptr, size * nmemb, true);
+  {
+    SCOPED_TSAN_INTERCEPTOR(fread, ptr, size, nmemb, f);
+    MemoryAccessRange(thr, pc, (uptr)ptr, size * nmemb, true);
+  }
   return REAL(fread)(ptr, size, nmemb, f);
 }
 
 TSAN_INTERCEPTOR(uptr, fwrite, const void *p, uptr size, uptr nmemb, void *f) {
-  SCOPED_TSAN_INTERCEPTOR(fwrite, p, size, nmemb, f);
-  MemoryAccessRange(thr, pc, (uptr)p, size * nmemb, false);
+  {
+    SCOPED_TSAN_INTERCEPTOR(fwrite, p, size, nmemb, f);
+    MemoryAccessRange(thr, pc, (uptr)p, size * nmemb, false);
+  }
   return REAL(fwrite)(p, size, nmemb, f);
 }
 
@@ -1532,7 +1530,7 @@
 
 TSAN_INTERCEPTOR(int, rmdir, char *path) {
   SCOPED_TSAN_INTERCEPTOR(rmdir, path);
-  Release(thr, pc, dir2addr(path));
+  Release(thr, pc, Dir2addr(path));
   int res = REAL(rmdir)(path);
   return res;
 }
@@ -1540,23 +1538,26 @@
 TSAN_INTERCEPTOR(void*, opendir, char *path) {
   SCOPED_TSAN_INTERCEPTOR(opendir, path);
   void *res = REAL(opendir)(path);
-  Acquire(thr, pc, dir2addr(path));
+  if (res != 0)
+    Acquire(thr, pc, Dir2addr(path));
   return res;
 }
 
 TSAN_INTERCEPTOR(int, epoll_ctl, int epfd, int op, int fd, void *ev) {
   SCOPED_TSAN_INTERCEPTOR(epoll_ctl, epfd, op, fd, ev);
-  if (op == EPOLL_CTL_ADD) {
+  if (op == EPOLL_CTL_ADD && epfd >= 0) {
     FdRelease(thr, pc, epfd);
   }
   int res = REAL(epoll_ctl)(epfd, op, fd, ev);
+  if (fd >= 0)
+    FdAccess(thr, pc, fd);
   return res;
 }
 
 TSAN_INTERCEPTOR(int, epoll_wait, int epfd, void *ev, int cnt, int timeout) {
   SCOPED_TSAN_INTERCEPTOR(epoll_wait, epfd, ev, cnt, timeout);
   int res = BLOCK_REAL(epoll_wait)(epfd, ev, cnt, timeout);
-  if (res > 0) {
+  if (res > 0 && epfd >= 0) {
     FdAcquire(thr, pc, epfd);
   }
   return res;
@@ -1729,6 +1730,46 @@
   return 0;
 }
 
+TSAN_INTERCEPTOR(int, fork, int fake) {
+  SCOPED_TSAN_INTERCEPTOR(fork, fake);
+  // It's intercepted merely to process pending signals.
+  int pid = REAL(fork)(fake);
+  if (pid == 0) {
+    // child
+    FdOnFork(thr, pc);
+  } else if (pid > 0) {
+    // parent
+  }
+  return pid;
+}
+
+struct TsanInterceptorContext {
+  ThreadState *thr;
+  const uptr caller_pc;
+  const uptr pc;
+};
+
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
+    MemoryAccessRange(((TsanInterceptorContext*)ctx)->thr,  \
+                      ((TsanInterceptorContext*)ctx)->pc,   \
+                      (uptr)ptr, size, true)
+#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size)       \
+    MemoryAccessRange(((TsanInterceptorContext*)ctx)->thr,  \
+                      ((TsanInterceptorContext*)ctx)->pc,   \
+                      (uptr)ptr, size, false)
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \
+    SCOPED_TSAN_INTERCEPTOR(func, __VA_ARGS__) \
+    TsanInterceptorContext _ctx = {thr, caller_pc, pc}; \
+    ctx = (void*)&_ctx; \
+    (void)ctx;
+#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd) \
+    FdAcquire(((TsanInterceptorContext*)ctx)->thr, pc, fd)
+#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd) \
+    FdRelease(((TsanInterceptorContext*)ctx)->thr, pc, fd)
+#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
+    ThreadSetName(((TsanInterceptorContext*)ctx)->thr, name)
+#include "sanitizer_common/sanitizer_common_interceptors.inc"
+
 namespace __tsan {
 
 void ProcessPendingSignals(ThreadState *thr) {
@@ -1763,6 +1804,7 @@
               (uptr)sigactions[sig].sa_sigaction :
               (uptr)sigactions[sig].sa_handler;
           stack.Init(&pc, 1);
+          Lock l(&ctx->thread_mtx);
           ScopedReport rep(ReportTypeErrnoInSignal);
           if (!IsFiredSuppression(ctx, rep, stack)) {
             rep.AddStack(&stack);
@@ -1791,6 +1833,8 @@
   REAL(memcpy) = internal_memcpy;
   REAL(memcmp) = internal_memcmp;
 
+  SANITIZER_COMMON_INTERCEPTORS_INIT;
+
   TSAN_INTERCEPT(longjmp);
   TSAN_INTERCEPT(siglongjmp);
 
@@ -1851,7 +1895,7 @@
   TSAN_INTERCEPT(pthread_rwlock_timedwrlock);
   TSAN_INTERCEPT(pthread_rwlock_unlock);
 
-  TSAN_INTERCEPT(pthread_cond_init);
+  // TSAN_INTERCEPT(pthread_cond_init);
   TSAN_INTERCEPT(pthread_cond_destroy);
   TSAN_INTERCEPT(pthread_cond_signal);
   TSAN_INTERCEPT(pthread_cond_broadcast);
@@ -1872,30 +1916,46 @@
   TSAN_INTERCEPT(sem_post);
   TSAN_INTERCEPT(sem_getvalue);
 
+  TSAN_INTERCEPT(stat);
+  TSAN_INTERCEPT(__xstat);
+  TSAN_INTERCEPT(stat64);
+  TSAN_INTERCEPT(__xstat64);
+  TSAN_INTERCEPT(lstat);
+  TSAN_INTERCEPT(__lxstat);
+  TSAN_INTERCEPT(lstat64);
+  TSAN_INTERCEPT(__lxstat64);
+  TSAN_INTERCEPT(fstat);
+  TSAN_INTERCEPT(__fxstat);
+  TSAN_INTERCEPT(fstat64);
+  TSAN_INTERCEPT(__fxstat64);
   TSAN_INTERCEPT(open);
+  TSAN_INTERCEPT(open64);
   TSAN_INTERCEPT(creat);
+  TSAN_INTERCEPT(creat64);
   TSAN_INTERCEPT(dup);
   TSAN_INTERCEPT(dup2);
   TSAN_INTERCEPT(dup3);
   TSAN_INTERCEPT(eventfd);
+  TSAN_INTERCEPT(signalfd);
+  TSAN_INTERCEPT(inotify_init);
+  TSAN_INTERCEPT(inotify_init1);
   TSAN_INTERCEPT(socket);
+  TSAN_INTERCEPT(socketpair);
   TSAN_INTERCEPT(connect);
+  TSAN_INTERCEPT(bind);
+  TSAN_INTERCEPT(listen);
   TSAN_INTERCEPT(accept);
   TSAN_INTERCEPT(accept4);
   TSAN_INTERCEPT(epoll_create);
   TSAN_INTERCEPT(epoll_create1);
   TSAN_INTERCEPT(close);
+  TSAN_INTERCEPT(__close);
+  TSAN_INTERCEPT(__res_iclose);
   TSAN_INTERCEPT(pipe);
   TSAN_INTERCEPT(pipe2);
 
-  TSAN_INTERCEPT(read);
-  TSAN_INTERCEPT(pread);
-  TSAN_INTERCEPT(pread64);
   TSAN_INTERCEPT(readv);
   TSAN_INTERCEPT(preadv64);
-  TSAN_INTERCEPT(write);
-  TSAN_INTERCEPT(pwrite);
-  TSAN_INTERCEPT(pwrite64);
   TSAN_INTERCEPT(writev);
   TSAN_INTERCEPT(pwritev64);
   TSAN_INTERCEPT(send);
@@ -1905,6 +1965,8 @@
 
   TSAN_INTERCEPT(unlink);
   TSAN_INTERCEPT(fopen);
+  TSAN_INTERCEPT(freopen);
+  TSAN_INTERCEPT(fclose);
   TSAN_INTERCEPT(fread);
   TSAN_INTERCEPT(fwrite);
   TSAN_INTERCEPT(puts);
@@ -1930,13 +1992,17 @@
   TSAN_INTERCEPT(mlockall);
   TSAN_INTERCEPT(munlockall);
 
+  TSAN_INTERCEPT(fork);
+  TSAN_INTERCEPT(on_exit);
+  TSAN_INTERCEPT(__cxa_atexit);
+
   // Need to setup it, because interceptors check that the function is resolved.
   // But atexit is emitted directly into the module, so can't be resolved.
   REAL(atexit) = (int(*)(void(*)()))unreachable;
   atexit_ctx = new(internal_alloc(MBlockAtExit, sizeof(AtExitContext)))
       AtExitContext();
 
-  if (__cxa_atexit(&finalize, 0, 0)) {
+  if (REAL(__cxa_atexit)(&finalize, 0, 0)) {
     Printf("ThreadSanitizer: failed to setup atexit callback\n");
     Die();
   }
diff --git a/lib/tsan/rtl/tsan_interface.cc b/lib/tsan/rtl/tsan_interface.cc
index 6d09546..dd06bbe 100644
--- a/lib/tsan/rtl/tsan_interface.cc
+++ b/lib/tsan/rtl/tsan_interface.cc
@@ -24,13 +24,13 @@
 }
 
 void __tsan_read16(void *addr) {
-  MemoryRead8Byte(cur_thread(), CALLERPC, (uptr)addr);
-  MemoryRead8Byte(cur_thread(), CALLERPC, (uptr)addr + 8);
+  MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
+  MemoryRead(cur_thread(), CALLERPC, (uptr)addr + 8, kSizeLog8);
 }
 
 void __tsan_write16(void *addr) {
-  MemoryWrite8Byte(cur_thread(), CALLERPC, (uptr)addr);
-  MemoryWrite8Byte(cur_thread(), CALLERPC, (uptr)addr + 8);
+  MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
+  MemoryWrite(cur_thread(), CALLERPC, (uptr)addr + 8, kSizeLog8);
 }
 
 void __tsan_acquire(void *addr) {
diff --git a/lib/tsan/rtl/tsan_interface.h b/lib/tsan/rtl/tsan_interface.h
index 7480fc8..28eea14 100644
--- a/lib/tsan/rtl/tsan_interface.h
+++ b/lib/tsan/rtl/tsan_interface.h
@@ -16,7 +16,7 @@
 #ifndef TSAN_INTERFACE_H
 #define TSAN_INTERFACE_H
 
-#include <sanitizer/common_interface_defs.h>
+#include <sanitizer_common/sanitizer_internal_defs.h>
 
 // This header should NOT include any other headers.
 // All functions in this header are extern "C" and start with __tsan_.
diff --git a/lib/tsan/rtl/tsan_interface_ann.h b/lib/tsan/rtl/tsan_interface_ann.h
index ed80907..8e45328 100644
--- a/lib/tsan/rtl/tsan_interface_ann.h
+++ b/lib/tsan/rtl/tsan_interface_ann.h
@@ -14,7 +14,7 @@
 #ifndef TSAN_INTERFACE_ANN_H
 #define TSAN_INTERFACE_ANN_H
 
-#include <sanitizer/common_interface_defs.h>
+#include <sanitizer_common/sanitizer_internal_defs.h>
 
 // This header should NOT include any other headers.
 // All functions in this header are extern "C" and start with __tsan_.
diff --git a/lib/tsan/rtl/tsan_interface_atomic.cc b/lib/tsan/rtl/tsan_interface_atomic.cc
index a9d75e5..a2f7ff4 100644
--- a/lib/tsan/rtl/tsan_interface_atomic.cc
+++ b/lib/tsan/rtl/tsan_interface_atomic.cc
@@ -20,25 +20,42 @@
 // http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
 
 #include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
 #include "tsan_interface_atomic.h"
 #include "tsan_flags.h"
 #include "tsan_rtl.h"
 
 using namespace __tsan;  // NOLINT
 
+#define SCOPED_ATOMIC(func, ...) \
+    const uptr callpc = (uptr)__builtin_return_address(0); \
+    uptr pc = __sanitizer::StackTrace::GetCurrentPc(); \
+    pc = __sanitizer::StackTrace::GetPreviousInstructionPc(pc); \
+    mo = ConvertOrder(mo); \
+    mo = flags()->force_seq_cst_atomics ? (morder)mo_seq_cst : mo; \
+    ThreadState *const thr = cur_thread(); \
+    AtomicStatInc(thr, sizeof(*a), mo, StatAtomic##func); \
+    ScopedAtomic sa(thr, callpc, __FUNCTION__); \
+    return Atomic##func(thr, pc, __VA_ARGS__); \
+/**/
+
 class ScopedAtomic {
  public:
   ScopedAtomic(ThreadState *thr, uptr pc, const char *func)
       : thr_(thr) {
-    CHECK_EQ(thr_->in_rtl, 1);  // 1 due to our own ScopedInRtl member.
+    CHECK_EQ(thr_->in_rtl, 0);
+    ProcessPendingSignals(thr);
+    FuncEntry(thr_, pc);
     DPrintf("#%d: %s\n", thr_->tid, func);
+    thr_->in_rtl++;
   }
   ~ScopedAtomic() {
-    CHECK_EQ(thr_->in_rtl, 1);
+    thr_->in_rtl--;
+    CHECK_EQ(thr_->in_rtl, 0);
+    FuncExit(thr_);
   }
  private:
   ThreadState *thr_;
-  ScopedInRtl in_rtl_;
 };
 
 // Some shortcuts.
@@ -212,16 +229,19 @@
 }
 #endif
 
-#define SCOPED_ATOMIC(func, ...) \
-    mo = ConvertOrder(mo); \
-    mo = flags()->force_seq_cst_atomics ? (morder)mo_seq_cst : mo; \
-    ThreadState *const thr = cur_thread(); \
-    ProcessPendingSignals(thr); \
-    const uptr pc = (uptr)__builtin_return_address(0); \
-    AtomicStatInc(thr, sizeof(*a), mo, StatAtomic##func); \
-    ScopedAtomic sa(thr, pc, __FUNCTION__); \
-    return Atomic##func(thr, pc, __VA_ARGS__); \
-/**/
+template<typename T>
+static int SizeLog() {
+  if (sizeof(T) <= 1)
+    return kSizeLog1;
+  else if (sizeof(T) <= 2)
+    return kSizeLog2;
+  else if (sizeof(T) <= 4)
+    return kSizeLog4;
+  else
+    return kSizeLog8;
+  // For 16-byte atomics we also use 8-byte memory access,
+  // this leads to false negatives only in very obscure cases.
+}
 
 template<typename T>
 static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a,
@@ -229,14 +249,17 @@
   CHECK(IsLoadOrder(mo));
   // This fast-path is critical for performance.
   // Assume the access is atomic.
-  if (!IsAcquireOrder(mo) && sizeof(T) <= sizeof(a))
+  if (!IsAcquireOrder(mo) && sizeof(T) <= sizeof(a)) {
+    MemoryReadAtomic(thr, pc, (uptr)a, SizeLog<T>());
     return *a;
+  }
   SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, false);
   thr->clock.set(thr->tid, thr->fast_state.epoch());
   thr->clock.acquire(&s->clock);
   T v = *a;
   s->mtx.ReadUnlock();
   __sync_synchronize();
+  MemoryReadAtomic(thr, pc, (uptr)a, SizeLog<T>());
   return v;
 }
 
@@ -244,6 +267,7 @@
 static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
     morder mo) {
   CHECK(IsStoreOrder(mo));
+  MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog<T>());
   // This fast-path is critical for performance.
   // Assume the access is atomic.
   // Strictly saying even relaxed store cuts off release sequence,
@@ -265,6 +289,7 @@
 
 template<typename T, T (*F)(volatile T *v, T op)>
 static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
+  MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog<T>());
   SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, true);
   thr->clock.set(thr->tid, thr->fast_state.epoch());
   if (IsAcqRelOrder(mo))
@@ -324,6 +349,7 @@
 static bool AtomicCAS(ThreadState *thr, uptr pc,
     volatile T *a, T *c, T v, morder mo, morder fmo) {
   (void)fmo;  // Unused because llvm does not pass it yet.
+  MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog<T>());
   SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, true);
   thr->clock.set(thr->tid, thr->fast_state.epoch());
   if (IsAcqRelOrder(mo))
diff --git a/lib/tsan/rtl/tsan_interface_atomic.h b/lib/tsan/rtl/tsan_interface_atomic.h
index c304fcc..5352d56 100644
--- a/lib/tsan/rtl/tsan_interface_atomic.h
+++ b/lib/tsan/rtl/tsan_interface_atomic.h
@@ -28,7 +28,7 @@
 
 #if defined(__SIZEOF_INT128__) \
     || (__clang_major__ * 100 + __clang_minor__ >= 302)
-typedef __int128 __tsan_atomic128;
+__extension__ typedef __int128 __tsan_atomic128;
 #define __TSAN_HAS_INT128 1
 #else
 typedef char     __tsan_atomic128;
diff --git a/lib/tsan/rtl/tsan_interface_inl.h b/lib/tsan/rtl/tsan_interface_inl.h
index 8a92155..29e2b21 100644
--- a/lib/tsan/rtl/tsan_interface_inl.h
+++ b/lib/tsan/rtl/tsan_interface_inl.h
@@ -19,41 +19,41 @@
 using namespace __tsan;  // NOLINT
 
 void __tsan_read1(void *addr) {
-  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 0, 0);
+  MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog1);
 }
 
 void __tsan_read2(void *addr) {
-  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, 0);
+  MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog2);
 }
 
 void __tsan_read4(void *addr) {
-  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, 0);
+  MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog4);
 }
 
 void __tsan_read8(void *addr) {
-  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 3, 0);
+  MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
 }
 
 void __tsan_write1(void *addr) {
-  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 0, 1);
+  MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog1);
 }
 
 void __tsan_write2(void *addr) {
-  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, 1);
+  MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog2);
 }
 
 void __tsan_write4(void *addr) {
-  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, 1);
+  MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog4);
 }
 
 void __tsan_write8(void *addr) {
-  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 3, 1);
+  MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
 }
 
 void __tsan_vptr_update(void **vptr_p, void *new_val) {
   CHECK_EQ(sizeof(vptr_p), 8);
   if (*vptr_p != new_val)
-    MemoryAccess(cur_thread(), CALLERPC, (uptr)vptr_p, 3, 1);
+    MemoryWrite(cur_thread(), CALLERPC, (uptr)vptr_p, kSizeLog8);
 }
 
 void __tsan_func_entry(void *pc) {
diff --git a/lib/tsan/rtl/tsan_interface_java.cc b/lib/tsan/rtl/tsan_interface_java.cc
new file mode 100644
index 0000000..ee12001
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface_java.cc
@@ -0,0 +1,305 @@
+//===-- tsan_interface_java.cc --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_interface_java.h"
+#include "tsan_rtl.h"
+#include "tsan_mutex.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+
+using namespace __tsan;  // NOLINT
+
+namespace __tsan {
+
+const uptr kHeapShadow = 0x300000000000ull;
+const uptr kHeapAlignment = 8;
+
+struct BlockDesc {
+  bool begin;
+  Mutex mtx;
+  SyncVar *head;
+
+  BlockDesc()
+      : mtx(MutexTypeJavaMBlock, StatMtxJavaMBlock)
+      , head() {
+    CHECK_EQ(begin, false);
+    begin = true;
+  }
+
+  ~BlockDesc() {
+    CHECK_EQ(begin, true);
+    begin = false;
+    ThreadState *thr = cur_thread();
+    SyncVar *s = head;
+    while (s) {
+      SyncVar *s1 = s->next;
+      StatInc(thr, StatSyncDestroyed);
+      s->mtx.Lock();
+      s->mtx.Unlock();
+      thr->mset.Remove(s->GetId());
+      DestroyAndFree(s);
+      s = s1;
+    }
+  }
+};
+
+struct JavaContext {
+  const uptr heap_begin;
+  const uptr heap_size;
+  BlockDesc *heap_shadow;
+
+  JavaContext(jptr heap_begin, jptr heap_size)
+      : heap_begin(heap_begin)
+      , heap_size(heap_size) {
+    uptr size = heap_size / kHeapAlignment * sizeof(BlockDesc);
+    heap_shadow = (BlockDesc*)MmapFixedNoReserve(kHeapShadow, size);
+    if ((uptr)heap_shadow != kHeapShadow) {
+      Printf("ThreadSanitizer: failed to mmap Java heap shadow\n");
+      Die();
+    }
+  }
+};
+
+class ScopedJavaFunc {
+ public:
+  ScopedJavaFunc(ThreadState *thr, uptr pc)
+      : thr_(thr) {
+    Initialize(thr_);
+    FuncEntry(thr, pc);
+    CHECK_EQ(thr_->in_rtl, 0);
+    thr_->in_rtl++;
+  }
+
+  ~ScopedJavaFunc() {
+    thr_->in_rtl--;
+    CHECK_EQ(thr_->in_rtl, 0);
+    FuncExit(thr_);
+    // FIXME(dvyukov): process pending signals.
+  }
+
+ private:
+  ThreadState *thr_;
+};
+
+static u64 jctx_buf[sizeof(JavaContext) / sizeof(u64) + 1];
+static JavaContext *jctx;
+
+static BlockDesc *getblock(uptr addr) {
+  uptr i = (addr - jctx->heap_begin) / kHeapAlignment;
+  return &jctx->heap_shadow[i];
+}
+
+static uptr USED getmem(BlockDesc *b) {
+  uptr i = b - jctx->heap_shadow;
+  uptr p = jctx->heap_begin + i * kHeapAlignment;
+  CHECK_GE(p, jctx->heap_begin);
+  CHECK_LT(p, jctx->heap_begin + jctx->heap_size);
+  return p;
+}
+
+static BlockDesc *getblockbegin(uptr addr) {
+  for (BlockDesc *b = getblock(addr);; b--) {
+    CHECK_GE(b, jctx->heap_shadow);
+    if (b->begin)
+      return b;
+  }
+  return 0;
+}
+
+SyncVar* GetJavaSync(ThreadState *thr, uptr pc, uptr addr,
+                     bool write_lock, bool create) {
+  if (jctx == 0 || addr < jctx->heap_begin
+      || addr >= jctx->heap_begin + jctx->heap_size)
+    return 0;
+  BlockDesc *b = getblockbegin(addr);
+  DPrintf("#%d: GetJavaSync %p->%p\n", thr->tid, addr, b);
+  Lock l(&b->mtx);
+  SyncVar *s = b->head;
+  for (; s; s = s->next) {
+    if (s->addr == addr) {
+      DPrintf("#%d: found existing sync for %p\n", thr->tid, addr);
+      break;
+    }
+  }
+  if (s == 0 && create) {
+    DPrintf("#%d: creating new sync for %p\n", thr->tid, addr);
+    s = CTX()->synctab.Create(thr, pc, addr);
+    s->next = b->head;
+    b->head = s;
+  }
+  if (s) {
+    if (write_lock)
+      s->mtx.Lock();
+    else
+      s->mtx.ReadLock();
+  }
+  return s;
+}
+
+SyncVar* GetAndRemoveJavaSync(ThreadState *thr, uptr pc, uptr addr) {
+  // We do not destroy Java mutexes other than in __tsan_java_free().
+  return 0;
+}
+
+}  // namespace __tsan
+
+#define SCOPED_JAVA_FUNC(func) \
+  ThreadState *thr = cur_thread(); \
+  const uptr caller_pc = GET_CALLER_PC(); \
+  const uptr pc = (uptr)&func; \
+  (void)pc; \
+  ScopedJavaFunc scoped(thr, caller_pc); \
+/**/
+
+void __tsan_java_init(jptr heap_begin, jptr heap_size) {
+  SCOPED_JAVA_FUNC(__tsan_java_init);
+  DPrintf("#%d: java_init(%p, %p)\n", thr->tid, heap_begin, heap_size);
+  CHECK_EQ(jctx, 0);
+  CHECK_GT(heap_begin, 0);
+  CHECK_GT(heap_size, 0);
+  CHECK_EQ(heap_begin % kHeapAlignment, 0);
+  CHECK_EQ(heap_size % kHeapAlignment, 0);
+  CHECK_LT(heap_begin, heap_begin + heap_size);
+  jctx = new(jctx_buf) JavaContext(heap_begin, heap_size);
+}
+
+int  __tsan_java_fini() {
+  SCOPED_JAVA_FUNC(__tsan_java_fini);
+  DPrintf("#%d: java_fini()\n", thr->tid);
+  CHECK_NE(jctx, 0);
+  // FIXME(dvyukov): this does not call atexit() callbacks.
+  int status = Finalize(thr);
+  DPrintf("#%d: java_fini() = %d\n", thr->tid, status);
+  return status;
+}
+
+void __tsan_java_alloc(jptr ptr, jptr size) {
+  SCOPED_JAVA_FUNC(__tsan_java_alloc);
+  DPrintf("#%d: java_alloc(%p, %p)\n", thr->tid, ptr, size);
+  CHECK_NE(jctx, 0);
+  CHECK_NE(size, 0);
+  CHECK_EQ(ptr % kHeapAlignment, 0);
+  CHECK_EQ(size % kHeapAlignment, 0);
+  CHECK_GE(ptr, jctx->heap_begin);
+  CHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
+
+  BlockDesc *b = getblock(ptr);
+  new(b) BlockDesc();
+}
+
+void __tsan_java_free(jptr ptr, jptr size) {
+  SCOPED_JAVA_FUNC(__tsan_java_free);
+  DPrintf("#%d: java_free(%p, %p)\n", thr->tid, ptr, size);
+  CHECK_NE(jctx, 0);
+  CHECK_NE(size, 0);
+  CHECK_EQ(ptr % kHeapAlignment, 0);
+  CHECK_EQ(size % kHeapAlignment, 0);
+  CHECK_GE(ptr, jctx->heap_begin);
+  CHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
+
+  BlockDesc *beg = getblock(ptr);
+  BlockDesc *end = getblock(ptr + size);
+  for (BlockDesc *b = beg; b != end; b++) {
+    if (b->begin)
+      b->~BlockDesc();
+  }
+}
+
+void __tsan_java_move(jptr src, jptr dst, jptr size) {
+  SCOPED_JAVA_FUNC(__tsan_java_move);
+  DPrintf("#%d: java_move(%p, %p, %p)\n", thr->tid, src, dst, size);
+  CHECK_NE(jctx, 0);
+  CHECK_NE(size, 0);
+  CHECK_EQ(src % kHeapAlignment, 0);
+  CHECK_EQ(dst % kHeapAlignment, 0);
+  CHECK_EQ(size % kHeapAlignment, 0);
+  CHECK_GE(src, jctx->heap_begin);
+  CHECK_LE(src + size, jctx->heap_begin + jctx->heap_size);
+  CHECK_GE(dst, jctx->heap_begin);
+  CHECK_LE(dst + size, jctx->heap_begin + jctx->heap_size);
+  CHECK(dst >= src + size || src >= dst + size);
+
+  // Assuming it's not running concurrently with threads that do
+  // memory accesses and mutex operations (stop-the-world phase).
+  {  // NOLINT
+    BlockDesc *s = getblock(src);
+    BlockDesc *d = getblock(dst);
+    BlockDesc *send = getblock(src + size);
+    for (; s != send; s++, d++) {
+      CHECK_EQ(d->begin, false);
+      if (s->begin) {
+        DPrintf("#%d: moving block %p->%p\n", thr->tid, getmem(s), getmem(d));
+        new(d) BlockDesc;
+        d->head = s->head;
+        for (SyncVar *sync = d->head; sync; sync = sync->next) {
+          uptr newaddr = sync->addr - src + dst;
+          DPrintf("#%d: moving sync %p->%p\n", thr->tid, sync->addr, newaddr);
+          sync->addr = newaddr;
+        }
+        s->head = 0;
+        s->~BlockDesc();
+      }
+    }
+  }
+
+  {  // NOLINT
+    u64 *s = (u64*)MemToShadow(src);
+    u64 *d = (u64*)MemToShadow(dst);
+    u64 *send = (u64*)MemToShadow(src + size);
+    for (; s != send; s++, d++) {
+      *d = *s;
+      *s = 0;
+    }
+  }
+}
+
+void __tsan_java_mutex_lock(jptr addr) {
+  SCOPED_JAVA_FUNC(__tsan_java_mutex_lock);
+  DPrintf("#%d: java_mutex_lock(%p)\n", thr->tid, addr);
+  CHECK_NE(jctx, 0);
+  CHECK_GE(addr, jctx->heap_begin);
+  CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  MutexLock(thr, pc, addr);
+}
+
+void __tsan_java_mutex_unlock(jptr addr) {
+  SCOPED_JAVA_FUNC(__tsan_java_mutex_unlock);
+  DPrintf("#%d: java_mutex_unlock(%p)\n", thr->tid, addr);
+  CHECK_NE(jctx, 0);
+  CHECK_GE(addr, jctx->heap_begin);
+  CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  MutexUnlock(thr, pc, addr);
+}
+
+void __tsan_java_mutex_read_lock(jptr addr) {
+  SCOPED_JAVA_FUNC(__tsan_java_mutex_read_lock);
+  DPrintf("#%d: java_mutex_read_lock(%p)\n", thr->tid, addr);
+  CHECK_NE(jctx, 0);
+  CHECK_GE(addr, jctx->heap_begin);
+  CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  MutexReadLock(thr, pc, addr);
+}
+
+void __tsan_java_mutex_read_unlock(jptr addr) {
+  SCOPED_JAVA_FUNC(__tsan_java_mutex_read_unlock);
+  DPrintf("#%d: java_mutex_read_unlock(%p)\n", thr->tid, addr);
+  CHECK_NE(jctx, 0);
+  CHECK_GE(addr, jctx->heap_begin);
+  CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  MutexReadUnlock(thr, pc, addr);
+}
diff --git a/lib/tsan/rtl/tsan_interface_java.h b/lib/tsan/rtl/tsan_interface_java.h
new file mode 100644
index 0000000..241483a
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface_java.h
@@ -0,0 +1,74 @@
+//===-- tsan_interface_java.h -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Interface for verification of Java or mixed Java/C++ programs.
+// The interface is intended to be used from within a JVM and notify TSan
+// about such events like Java locks and GC memory compaction.
+//
+// For plain memory accesses and function entry/exit a JVM is intended to use
+// C++ interfaces: __tsan_readN/writeN and __tsan_func_enter/exit.
+//
+// For volatile memory accesses and atomic operations JVM is intended to use
+// standard atomics API: __tsan_atomicN_load/store/etc.
+//
+// For usage examples see lit_tests/java_*.cc
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_JAVA_H
+#define TSAN_INTERFACE_JAVA_H
+
+#ifndef INTERFACE_ATTRIBUTE
+# define INTERFACE_ATTRIBUTE __attribute__((visibility("default")))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef unsigned long jptr;  // NOLINT
+
+// Must be called before any other callback from Java.
+void __tsan_java_init(jptr heap_begin, jptr heap_size) INTERFACE_ATTRIBUTE;
+// Must be called when the application exits.
+// Not necessary the last callback (concurrently running threads are OK).
+// Returns exit status or 0 if tsan does not want to override it.
+int  __tsan_java_fini() INTERFACE_ATTRIBUTE;
+
+// Callback for memory allocations.
+// May be omitted for allocations that are not subject to data races
+// nor contain synchronization objects (e.g. String).
+void __tsan_java_alloc(jptr ptr, jptr size) INTERFACE_ATTRIBUTE;
+// Callback for memory free.
+// Can be aggregated for several objects (preferably).
+void __tsan_java_free(jptr ptr, jptr size) INTERFACE_ATTRIBUTE;
+// Callback for memory move by GC.
+// Can be aggregated for several objects (preferably).
+// The ranges must not overlap.
+void __tsan_java_move(jptr src, jptr dst, jptr size) INTERFACE_ATTRIBUTE;
+
+// Mutex lock.
+// Addr is any unique address associated with the mutex.
+// Must not be called on recursive reentry.
+// Object.wait() is handled as a pair of unlock/lock.
+void __tsan_java_mutex_lock(jptr addr) INTERFACE_ATTRIBUTE;
+// Mutex unlock.
+void __tsan_java_mutex_unlock(jptr addr) INTERFACE_ATTRIBUTE;
+// Mutex read lock.
+void __tsan_java_mutex_read_lock(jptr addr) INTERFACE_ATTRIBUTE;
+// Mutex read unlock.
+void __tsan_java_mutex_read_unlock(jptr addr) INTERFACE_ATTRIBUTE;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#undef INTERFACE_ATTRIBUTE
+
+#endif  // #ifndef TSAN_INTERFACE_JAVA_H
diff --git a/lib/tsan/rtl/tsan_md5.cc b/lib/tsan/rtl/tsan_md5.cc
index c9d671f..66e8240 100644
--- a/lib/tsan/rtl/tsan_md5.cc
+++ b/lib/tsan/rtl/tsan_md5.cc
@@ -242,4 +242,4 @@
   MD5_Final((unsigned char*)&res.hash[0], &ctx);
   return res;
 }
-}
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index b7e3c76..fb32483 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -38,8 +38,16 @@
   allocator()->Init();
 }
 
-void AlloctorThreadFinish(ThreadState *thr) {
-  allocator()->SwallowCache(&thr->alloc_cache);
+void AllocatorThreadStart(ThreadState *thr) {
+  allocator()->InitCache(&thr->alloc_cache);
+}
+
+void AllocatorThreadFinish(ThreadState *thr) {
+  allocator()->DestroyCache(&thr->alloc_cache);
+}
+
+void AllocatorPrintStats() {
+  allocator()->PrintStats();
 }
 
 static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
@@ -48,6 +56,7 @@
   Context *ctx = CTX();
   StackTrace stack;
   stack.ObtainCurrent(thr, pc);
+  Lock l(&ctx->thread_mtx);
   ScopedReport rep(ReportTypeSignalUnsafe);
   if (!IsFiredSuppression(ctx, rep, stack)) {
     rep.AddStack(&stack);
@@ -118,10 +127,20 @@
   return p2;
 }
 
+uptr user_alloc_usable_size(ThreadState *thr, uptr pc, void *p) {
+  CHECK_GT(thr->in_rtl, 0);
+  if (p == 0)
+    return 0;
+  MBlock *b = (MBlock*)allocator()->GetMetaData(p);
+  return (b) ? b->size : 0;
+}
+
 MBlock *user_mblock(ThreadState *thr, void *p) {
-  // CHECK_GT(thr->in_rtl, 0);
   CHECK_NE(p, (void*)0);
-  return (MBlock*)allocator()->GetMetaData(p);
+  Allocator *a = allocator();
+  void *b = a->GetBlockBegin(p);
+  CHECK_NE(b, 0);
+  return (MBlock*)a->GetMetaData(b);
 }
 
 void invoke_malloc_hook(void *ptr, uptr size) {
@@ -161,3 +180,49 @@
 }
 
 }  // namespace __tsan
+
+using namespace __tsan;
+
+extern "C" {
+uptr __tsan_get_current_allocated_bytes() {
+  u64 stats[AllocatorStatCount];
+  allocator()->GetStats(stats);
+  u64 m = stats[AllocatorStatMalloced];
+  u64 f = stats[AllocatorStatFreed];
+  return m >= f ? m - f : 1;
+}
+
+uptr __tsan_get_heap_size() {
+  u64 stats[AllocatorStatCount];
+  allocator()->GetStats(stats);
+  u64 m = stats[AllocatorStatMmapped];
+  u64 f = stats[AllocatorStatUnmapped];
+  return m >= f ? m - f : 1;
+}
+
+uptr __tsan_get_free_bytes() {
+  return 1;
+}
+
+uptr __tsan_get_unmapped_bytes() {
+  return 1;
+}
+
+uptr __tsan_get_estimated_allocated_size(uptr size) {
+  return size;
+}
+
+bool __tsan_get_ownership(void *p) {
+  return allocator()->GetBlockBegin(p) != 0;
+}
+
+uptr __tsan_get_allocated_size(void *p) {
+  if (p == 0)
+    return 0;
+  p = allocator()->GetBlockBegin(p);
+  if (p == 0)
+    return 0;
+  MBlock *b = (MBlock*)allocator()->GetMetaData(p);
+  return b->size;
+}
+}  // extern "C"
diff --git a/lib/tsan/rtl/tsan_mman.h b/lib/tsan/rtl/tsan_mman.h
index 06dce8c..4a9240f 100644
--- a/lib/tsan/rtl/tsan_mman.h
+++ b/lib/tsan/rtl/tsan_mman.h
@@ -20,7 +20,9 @@
 const uptr kDefaultAlignment = 16;
 
 void InitializeAllocator();
-void AlloctorThreadFinish(ThreadState *thr);
+void AllocatorThreadStart(ThreadState *thr);
+void AllocatorThreadFinish(ThreadState *thr);
+void AllocatorPrintStats();
 
 // For user allocations.
 void *user_alloc(ThreadState *thr, uptr pc, uptr sz,
@@ -29,6 +31,7 @@
 void user_free(ThreadState *thr, uptr pc, void *p);
 void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz);
 void *user_alloc_aligned(ThreadState *thr, uptr pc, uptr sz, uptr align);
+uptr user_alloc_usable_size(ThreadState *thr, uptr pc, void *p);
 // Given the pointer p into a valid allocated block,
 // returns the descriptor of the block.
 MBlock *user_mblock(ThreadState *thr, void *p);
@@ -59,6 +62,7 @@
   MBlockSuppression,
   MBlockExpectRace,
   MBlockSignal,
+  MBlockFD,
 
   // This must be the last.
   MBlockTypeCount
diff --git a/lib/tsan/rtl/tsan_mutex.cc b/lib/tsan/rtl/tsan_mutex.cc
index ca9b108..335ca22 100644
--- a/lib/tsan/rtl/tsan_mutex.cc
+++ b/lib/tsan/rtl/tsan_mutex.cc
@@ -25,23 +25,28 @@
 // then Report mutex can be locked while under Threads mutex.
 // The leaf mutexes can be locked under any other mutexes.
 // Recursive locking is not supported.
+#if TSAN_DEBUG && !TSAN_GO
 const MutexType MutexTypeLeaf = (MutexType)-1;
 static MutexType CanLockTab[MutexTypeCount][MutexTypeCount] = {
-  /*0 MutexTypeInvalid*/     {},
-  /*1 MutexTypeTrace*/       {MutexTypeLeaf},
-  /*2 MutexTypeThreads*/     {MutexTypeReport},
-  /*3 MutexTypeReport*/      {MutexTypeSyncTab, MutexTypeMBlock},
-  /*4 MutexTypeSyncVar*/     {},
-  /*5 MutexTypeSyncTab*/     {MutexTypeSyncVar},
-  /*6 MutexTypeSlab*/        {MutexTypeLeaf},
-  /*7 MutexTypeAnnotations*/ {},
-  /*8 MutexTypeAtExit*/      {MutexTypeSyncTab},
-  /*9 MutexTypeMBlock*/      {MutexTypeSyncVar},
+  /*0  MutexTypeInvalid*/     {},
+  /*1  MutexTypeTrace*/       {MutexTypeLeaf},
+  /*2  MutexTypeThreads*/     {MutexTypeReport},
+  /*3  MutexTypeReport*/      {MutexTypeSyncTab, MutexTypeMBlock,
+                               MutexTypeJavaMBlock},
+  /*4  MutexTypeSyncVar*/     {},
+  /*5  MutexTypeSyncTab*/     {MutexTypeSyncVar},
+  /*6  MutexTypeSlab*/        {MutexTypeLeaf},
+  /*7  MutexTypeAnnotations*/ {},
+  /*8  MutexTypeAtExit*/      {MutexTypeSyncTab},
+  /*9  MutexTypeMBlock*/      {MutexTypeSyncVar},
+  /*10 MutexTypeJavaMBlock*/  {MutexTypeSyncVar},
 };
 
 static bool CanLockAdj[MutexTypeCount][MutexTypeCount];
+#endif
 
 void InitializeMutex() {
+#if TSAN_DEBUG && !TSAN_GO
   // Build the "can lock" adjacency matrix.
   // If [i][j]==true, then one can lock mutex j while under mutex i.
   const int N = MutexTypeCount;
@@ -115,12 +120,14 @@
       Die();
     }
   }
+#endif
 }
 
 DeadlockDetector::DeadlockDetector() {
   // Rely on zero initialization because some mutexes can be locked before ctor.
 }
 
+#if TSAN_DEBUG && !TSAN_GO
 void DeadlockDetector::Lock(MutexType t) {
   // Printf("LOCK %d @%zu\n", t, seq_ + 1);
   CHECK_GT(t, MutexTypeInvalid);
@@ -153,6 +160,7 @@
   CHECK(locked_[t]);
   locked_[t] = 0;
 }
+#endif
 
 const uptr kUnlocked = 0;
 const uptr kWriteLock = 1;
diff --git a/lib/tsan/rtl/tsan_mutex.h b/lib/tsan/rtl/tsan_mutex.h
index 68b33a7..a2b4891 100644
--- a/lib/tsan/rtl/tsan_mutex.h
+++ b/lib/tsan/rtl/tsan_mutex.h
@@ -30,6 +30,7 @@
   MutexTypeAnnotations,
   MutexTypeAtExit,
   MutexTypeMBlock,
+  MutexTypeJavaMBlock,
 
   // This must be the last.
   MutexTypeCount
diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h
index 4b7abb5..87b41d9 100644
--- a/lib/tsan/rtl/tsan_platform.h
+++ b/lib/tsan/rtl/tsan_platform.h
@@ -137,7 +137,6 @@
 
 const char *InitializePlatform();
 void FinalizePlatform();
-void MapThreadTrace(uptr addr, uptr size);
 uptr ALWAYS_INLINE INLINE GetThreadTrace(int tid) {
   uptr p = kTraceMemBegin + (uptr)tid * kTraceSize * sizeof(Event);
   DCHECK_LT(p, kTraceMemBegin + kTraceMemSize);
@@ -152,6 +151,7 @@
 uptr GetTlsSize();
 void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
                           uptr *tls_addr, uptr *tls_size);
+int ExtractResolvFDs(void *state, int *fds, int nfd);
 
 }  // namespace __tsan
 
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index 0a2ec3c..f756cbc 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -40,6 +40,8 @@
 #include <errno.h>
 #include <sched.h>
 #include <dlfcn.h>
+#define __need_res_state
+#include <resolv.h>
 
 extern "C" int arch_prctl(int code, __sanitizer::uptr *addr);
 
@@ -71,9 +73,7 @@
 }
 
 void FlushShadowMemory() {
-  madvise((void*)kLinuxShadowBeg,
-          kLinuxShadowEnd - kLinuxShadowBeg,
-          MADV_DONTNEED);
+  FlushUnneededShadowMemory(kLinuxShadowBeg, kLinuxShadowEnd - kLinuxShadowBeg);
 }
 
 #ifndef TSAN_GO
@@ -120,16 +120,6 @@
 }
 #endif
 
-void MapThreadTrace(uptr addr, uptr size) {
-  DPrintf("Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size);
-  CHECK_GE(addr, kTraceMemBegin);
-  CHECK_LE(addr + size, kTraceMemBegin + kTraceMemSize);
-  if (addr != (uptr)MmapFixedNoReserve(addr, size)) {
-    Printf("FATAL: ThreadSanitizer can not mmap thread trace\n");
-    Die();
-  }
-}
-
 static uptr g_data_start;
 static uptr g_data_end;
 
@@ -182,18 +172,14 @@
 #else
 # define INTERNAL_FUNCTION
 #endif
-extern "C" void _dl_get_tls_static_info(size_t*, size_t*)
-    __attribute__((weak)) INTERNAL_FUNCTION;
 
 static int InitTlsSize() {
   typedef void (*get_tls_func)(size_t*, size_t*) INTERNAL_FUNCTION;
-  get_tls_func get_tls = &_dl_get_tls_static_info;
-  if (get_tls == 0) {
-    void *get_tls_static_info_ptr = dlsym(RTLD_NEXT, "_dl_get_tls_static_info");
-    CHECK_EQ(sizeof(get_tls), sizeof(get_tls_static_info_ptr));
-    internal_memcpy(&get_tls, &get_tls_static_info_ptr,
-                    sizeof(get_tls_static_info_ptr));
-  }
+  get_tls_func get_tls;
+  void *get_tls_static_info_ptr = dlsym(RTLD_NEXT, "_dl_get_tls_static_info");
+  CHECK_EQ(sizeof(get_tls), sizeof(get_tls_static_info_ptr));
+  internal_memcpy(&get_tls, &get_tls_static_info_ptr,
+                  sizeof(get_tls_static_info_ptr));
   CHECK_NE(get_tls, 0);
   size_t tls_size = 0;
   size_t tls_align = 0;
@@ -222,36 +208,42 @@
     // Disable core dumps, dumping of 16TB usually takes a bit long.
     setlim(RLIMIT_CORE, 0);
   }
-  bool reexec = false;
-  // TSan doesn't play well with unlimited stack size (as stack
-  // overlaps with shadow memory). If we detect unlimited stack size,
-  // we re-exec the program with limited stack size as a best effort.
-  if (getlim(RLIMIT_STACK) == (rlim_t)-1) {
-    const uptr kMaxStackSize = 32 * 1024 * 1024;
-    Report("WARNING: Program is run with unlimited stack size, which "
-           "wouldn't work with ThreadSanitizer.\n");
-    Report("Re-execing with stack size limited to %zd bytes.\n", kMaxStackSize);
-    SetStackSizeLimitInBytes(kMaxStackSize);
-    reexec = true;
-  }
 
-  if (getlim(RLIMIT_AS) != (rlim_t)-1) {
-    Report("WARNING: Program is run with limited virtual address space, which "
-           "wouldn't work with ThreadSanitizer.\n");
-    Report("Re-execing with unlimited virtual address space.\n");
-    setlim(RLIMIT_AS, -1);
-    reexec = true;
-  }
+  // Go maps shadow memory lazily and works fine with limited address space.
+  // Unlimited stack is not a problem as well, because the executable
+  // is not compiled with -pie.
+  if (kCppMode) {
+    bool reexec = false;
+    // TSan doesn't play well with unlimited stack size (as stack
+    // overlaps with shadow memory). If we detect unlimited stack size,
+    // we re-exec the program with limited stack size as a best effort.
+    if (getlim(RLIMIT_STACK) == (rlim_t)-1) {
+      const uptr kMaxStackSize = 32 * 1024 * 1024;
+      Report("WARNING: Program is run with unlimited stack size, which "
+             "wouldn't work with ThreadSanitizer.\n");
+      Report("Re-execing with stack size limited to %zd bytes.\n",
+             kMaxStackSize);
+      SetStackSizeLimitInBytes(kMaxStackSize);
+      reexec = true;
+    }
 
-  if (reexec)
-    ReExec();
+    if (getlim(RLIMIT_AS) != (rlim_t)-1) {
+      Report("WARNING: Program is run with limited virtual address space,"
+             " which wouldn't work with ThreadSanitizer.\n");
+      Report("Re-execing with unlimited virtual address space.\n");
+      setlim(RLIMIT_AS, -1);
+      reexec = true;
+    }
+    if (reexec)
+      ReExec();
+  }
 
 #ifndef TSAN_GO
   CheckPIE();
   g_tls_size = (uptr)InitTlsSize();
   InitDataSeg();
 #endif
-  return getenv(kTsanOptionsEnv);
+  return GetEnv(kTsanOptionsEnv);
 }
 
 void FinalizePlatform() {
@@ -299,6 +291,19 @@
   return g_data_start && addr >= g_data_start && addr < g_data_end;
 }
 
+#ifndef TSAN_GO
+int ExtractResolvFDs(void *state, int *fds, int nfd) {
+  int cnt = 0;
+  __res_state *statp = (__res_state*)state;
+  for (int i = 0; i < MAXNS && cnt < nfd; i++) {
+    if (statp->_u._ext.nsaddrs[i] && statp->_u._ext.nssocks[i] != -1)
+      fds[cnt++] = statp->_u._ext.nssocks[i];
+  }
+  return cnt;
+}
+#endif
+
+
 }  // namespace __tsan
 
 #endif  // #ifdef __linux__
diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc
index 183061d..fb00742 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cc
+++ b/lib/tsan/rtl/tsan_platform_mac.cc
@@ -82,7 +82,7 @@
     setrlimit(RLIMIT_CORE, (rlimit*)&lim);
   }
 
-  return getenv(kTsanOptionsEnv);
+  return GetEnv(kTsanOptionsEnv);
 }
 
 void FinalizePlatform() {
diff --git a/lib/tsan/rtl/tsan_platform_windows.cc b/lib/tsan/rtl/tsan_platform_windows.cc
index f23e84e..60a59c7 100644
--- a/lib/tsan/rtl/tsan_platform_windows.cc
+++ b/lib/tsan/rtl/tsan_platform_windows.cc
@@ -34,7 +34,7 @@
 }
 
 const char *InitializePlatform() {
-  return getenv(kTsanOptionsEnv);
+  return GetEnv(kTsanOptionsEnv);
 }
 
 void FinalizePlatform() {
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
index af8235a..f52f456 100644
--- a/lib/tsan/rtl/tsan_report.cc
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -35,23 +35,28 @@
 
 #ifndef TSAN_GO
 
-static void PrintHeader(ReportType typ) {
-  Printf("WARNING: ThreadSanitizer: ");
+const int kThreadBufSize = 32;
+const char *thread_name(char *buf, int tid) {
+  if (tid == 0)
+    return "main thread";
+  internal_snprintf(buf, kThreadBufSize, "thread T%d", tid);
+  return buf;
+}
 
+static const char *ReportTypeString(ReportType typ) {
   if (typ == ReportTypeRace)
-    Printf("data race");
-  else if (typ == ReportTypeUseAfterFree)
-    Printf("heap-use-after-free");
-  else if (typ == ReportTypeThreadLeak)
-    Printf("thread leak");
-  else if (typ == ReportTypeMutexDestroyLocked)
-    Printf("destroy of a locked mutex");
-  else if (typ == ReportTypeSignalUnsafe)
-    Printf("signal-unsafe call inside of a signal");
-  else if (typ == ReportTypeErrnoInSignal)
-    Printf("signal handler spoils errno");
-
-  Printf(" (pid=%d)\n", GetPid());
+    return "data race";
+  if (typ == ReportTypeUseAfterFree)
+    return "heap-use-after-free";
+  if (typ == ReportTypeThreadLeak)
+    return "thread leak";
+  if (typ == ReportTypeMutexDestroyLocked)
+    return "destroy of a locked mutex";
+  if (typ == ReportTypeSignalUnsafe)
+    return "signal-unsafe call inside of a signal";
+  if (typ == ReportTypeErrnoInSignal)
+    return "signal handler spoils errno";
+  return "";
 }
 
 void PrintStack(const ReportStack *ent) {
@@ -81,35 +86,42 @@
   }
 }
 
+static const char *MopDesc(bool first, bool write, bool atomic) {
+  return atomic ? (first ? (write ? "Atomic write" : "Atomic read")
+                : (write ? "Previous atomic write" : "Previous atomic read"))
+                : (first ? (write ? "Write" : "Read")
+                : (write ? "Previous write" : "Previous read"));
+}
+
 static void PrintMop(const ReportMop *mop, bool first) {
-  Printf("  %s of size %d at %p",
-      (first ? (mop->write ? "Write" : "Read")
-             : (mop->write ? "Previous write" : "Previous read")),
-      mop->size, (void*)mop->addr);
-  if (mop->tid == 0)
-    Printf(" by main thread");
-  else
-    Printf(" by thread T%d", mop->tid);
+  char thrbuf[kThreadBufSize];
+  Printf("  %s of size %d at %p by %s",
+      MopDesc(first, mop->write, mop->atomic),
+      mop->size, (void*)mop->addr,
+      thread_name(thrbuf, mop->tid));
   PrintMutexSet(mop->mset);
   Printf(":\n");
   PrintStack(mop->stack);
 }
 
 static void PrintLocation(const ReportLocation *loc) {
+  char thrbuf[kThreadBufSize];
   if (loc->type == ReportLocationGlobal) {
-    Printf("  Location is global '%s' of size %zu at %zx %s:%d (%s+%p)\n\n",
-               loc->name, loc->size, loc->addr, loc->file, loc->line,
-               loc->module, loc->offset);
+    Printf("  Location is global '%s' of size %zu at %zx (%s+%p)\n\n",
+               loc->name, loc->size, loc->addr, loc->module, loc->offset);
   } else if (loc->type == ReportLocationHeap) {
-    Printf("  Location is heap block of size %zu at %p allocated",
-        loc->size, loc->addr);
-    if (loc->tid == 0)
-      Printf(" by main thread:\n");
-    else
-      Printf(" by thread T%d:\n", loc->tid);
+    char thrbuf[kThreadBufSize];
+    Printf("  Location is heap block of size %zu at %p allocated by %s:\n",
+        loc->size, loc->addr, thread_name(thrbuf, loc->tid));
     PrintStack(loc->stack);
   } else if (loc->type == ReportLocationStack) {
-    Printf("  Location is stack of thread T%d:\n\n", loc->tid);
+    Printf("  Location is stack of %s.\n\n", thread_name(thrbuf, loc->tid));
+  } else if (loc->type == ReportLocationTLS) {
+    Printf("  Location is TLS of %s.\n\n", thread_name(thrbuf, loc->tid));
+  } else if (loc->type == ReportLocationFD) {
+    Printf("  Location is file descriptor %d created by %s at:\n",
+        loc->fd, thread_name(thrbuf, loc->tid));
+    PrintStack(loc->stack);
   }
 }
 
@@ -128,9 +140,12 @@
   Printf("  Thread T%d", rt->id);
   if (rt->name)
     Printf(" '%s'", rt->name);
-  Printf(" (tid=%zu, %s)", rt->pid, rt->running ? "running" : "finished");
+  char thrbuf[kThreadBufSize];
+  Printf(" (tid=%zu, %s) created by %s",
+    rt->pid, rt->running ? "running" : "finished",
+    thread_name(thrbuf, rt->parent_tid));
   if (rt->stack)
-    Printf(" created at:");
+    Printf(" at:");
   Printf("\n");
   PrintStack(rt->stack);
 }
@@ -140,9 +155,28 @@
   PrintStack(s);
 }
 
+static ReportStack *ChooseSummaryStack(const ReportDesc *rep) {
+  if (rep->mops.Size())
+    return rep->mops[0]->stack;
+  if (rep->stacks.Size())
+    return rep->stacks[0];
+  if (rep->mutexes.Size())
+    return rep->mutexes[0]->stack;
+  if (rep->threads.Size())
+    return rep->threads[0]->stack;
+  return 0;
+}
+
+ReportStack *SkipTsanInternalFrames(ReportStack *ent) {
+  while (FrameIsInternal(ent) && ent->next)
+    ent = ent->next;
+  return ent;
+}
+
 void PrintReport(const ReportDesc *rep) {
   Printf("==================\n");
-  PrintHeader(rep->typ);
+  const char *rep_typ_str = ReportTypeString(rep->typ);
+  Printf("WARNING: ThreadSanitizer: %s (pid=%d)\n", rep_typ_str, GetPid());
 
   for (uptr i = 0; i < rep->stacks.Size(); i++) {
     if (i)
@@ -165,6 +199,9 @@
   for (uptr i = 0; i < rep->threads.Size(); i++)
     PrintThread(rep->threads[i]);
 
+  if (ReportStack *ent = SkipTsanInternalFrames(ChooseSummaryStack(rep)))
+    ReportErrorSummary(rep_typ_str, ent->file, ent->line, ent->func);
+
   Printf("==================\n");
 }
 
diff --git a/lib/tsan/rtl/tsan_report.h b/lib/tsan/rtl/tsan_report.h
index 2c3667e..b2b7b53 100644
--- a/lib/tsan/rtl/tsan_report.h
+++ b/lib/tsan/rtl/tsan_report.h
@@ -48,6 +48,7 @@
   uptr addr;
   int size;
   bool write;
+  bool atomic;
   Vector<ReportMopMutex> mset;
   ReportStack *stack;
 
@@ -57,7 +58,9 @@
 enum ReportLocationType {
   ReportLocationGlobal,
   ReportLocationHeap,
-  ReportLocationStack
+  ReportLocationStack,
+  ReportLocationTLS,
+  ReportLocationFD
 };
 
 struct ReportLocation {
@@ -67,6 +70,7 @@
   char *module;
   uptr offset;
   int tid;
+  int fd;
   char *name;
   char *file;
   int line;
@@ -78,6 +82,7 @@
   uptr pid;
   bool running;
   char *name;
+  int parent_tid;
   ReportStack *stack;
 };
 
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
index 7bb090d..e533a9c 100644
--- a/lib/tsan/rtl/tsan_rtl.cc
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -37,6 +37,11 @@
 #endif
 static char ctx_placeholder[sizeof(Context)] ALIGNED(64);
 
+// Can be overriden by a front-end.
+bool CPP_WEAK OnFinalize(bool failed) {
+  return failed;
+}
+
 static Context *ctx;
 Context *CTX() {
   return ctx;
@@ -138,7 +143,7 @@
   InternalScopedBuffer<char> filename(4096);
   internal_snprintf(filename.data(), filename.size(), "%s.%d",
       flags()->profile_memory, GetPid());
-  fd_t fd = internal_open(filename.data(), true);
+  fd_t fd = OpenFile(filename.data(), true);
   if (fd == kInvalidFd) {
     Printf("Failed to open memory profile file '%s'\n", &filename[0]);
     Die();
@@ -166,12 +171,23 @@
   MmapFixedNoReserve(MemToShadow(addr), size * kShadowMultiplier);
 }
 
+void MapThreadTrace(uptr addr, uptr size) {
+  DPrintf("#0: Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size);
+  CHECK_GE(addr, kTraceMemBegin);
+  CHECK_LE(addr + size, kTraceMemBegin + kTraceMemSize);
+  if (addr != (uptr)MmapFixedNoReserve(addr, size)) {
+    Printf("FATAL: ThreadSanitizer can not mmap thread trace\n");
+    Die();
+  }
+}
+
 void Initialize(ThreadState *thr) {
   // Thread safe because done before all threads exist.
   static bool is_initialized = false;
   if (is_initialized)
     return;
   is_initialized = true;
+  SanitizerToolName = "ThreadSanitizer";
   // Install tool-specific callbacks in sanitizer_common.
   SetCheckFailedCallback(TsanCheckFailed);
 
@@ -229,7 +245,7 @@
     Printf("ThreadSanitizer is suspended at startup (pid %d)."
            " Call __tsan_resume().\n",
            GetPid());
-    while (__tsan_resumed == 0);
+    while (__tsan_resumed == 0) {}
   }
 }
 
@@ -245,6 +261,11 @@
   ctx->report_mtx.Lock();
   ctx->report_mtx.Unlock();
 
+#ifndef TSAN_GO
+  if (ctx->flags.verbosity)
+    AllocatorPrintStats();
+#endif
+
   ThreadFinalize(thr);
 
   if (ctx->nreported) {
@@ -262,6 +283,8 @@
         ctx->nmissed_expected);
   }
 
+  failed = OnFinalize(failed);
+
   StatAggregate(ctx->stat, thr->stat);
   StatOutput(ctx->stat);
   return failed ? flags()->exitcode : 0;
@@ -348,18 +371,6 @@
 #endif
 }
 
-static inline bool BothReads(Shadow s, int kAccessIsWrite) {
-  return !kAccessIsWrite && !s.is_write();
-}
-
-static inline bool OldIsRWNotWeaker(Shadow old, int kAccessIsWrite) {
-  return old.is_write() || !kAccessIsWrite;
-}
-
-static inline bool OldIsRWWeakerOrEqual(Shadow old, int kAccessIsWrite) {
-  return !old.is_write() || kAccessIsWrite;
-}
-
 static inline bool OldIsInSameSynchEpoch(Shadow old, ThreadState *thr) {
   return old.epoch() >= thr->fast_synch_epoch;
 }
@@ -370,7 +381,7 @@
 
 ALWAYS_INLINE
 void MemoryAccessImpl(ThreadState *thr, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite,
+    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
     u64 *shadow_mem, Shadow cur) {
   StatInc(thr, StatMop);
   StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
@@ -444,7 +455,7 @@
 
 ALWAYS_INLINE
 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite) {
+    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) {
   u64 *shadow_mem = (u64*)MemToShadow(addr);
   DPrintf2("#%d: MemoryAccess: @%p %p size=%d"
       " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
@@ -471,12 +482,13 @@
   Shadow cur(fast_state);
   cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
   cur.SetWrite(kAccessIsWrite);
+  cur.SetAtomic(kIsAtomic);
 
   // We must not store to the trace if we do not store to the shadow.
   // That is, this call must be moved somewhere below.
   TraceAddEvent(thr, fast_state, EventTypeMop, pc);
 
-  MemoryAccessImpl(thr, addr, kAccessSizeLog, kAccessIsWrite,
+  MemoryAccessImpl(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
       shadow_mem, cur);
 }
 
@@ -523,7 +535,10 @@
 }
 
 void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  CHECK_EQ(thr->is_freeing, false);
+  thr->is_freeing = true;
   MemoryAccessRange(thr, pc, addr, size, true);
+  thr->is_freeing = false;
   Shadow s(thr->fast_state);
   s.ClearIgnoreBit();
   s.MarkAsFreed();
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index 5d74286..6452636 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -65,10 +65,22 @@
 #endif
 const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
 
+struct TsanMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const { }
+  void OnUnmap(uptr p, uptr size) const {
+    // We are about to unmap a chunk of user memory.
+    // Mark the corresponding shadow memory as not needed.
+    uptr shadow_beg = MemToShadow(p);
+    uptr shadow_end = MemToShadow(p + size);
+    CHECK(IsAligned(shadow_end|shadow_beg, GetPageSizeCached()));
+    FlushUnneededShadowMemory(shadow_beg, shadow_end - shadow_beg);
+  }
+};
+
 typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, sizeof(MBlock),
     DefaultSizeClassMap> PrimaryAllocator;
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
-typedef LargeMmapAllocator SecondaryAllocator;
+typedef LargeMmapAllocator<TsanMapUnmapCallback> SecondaryAllocator;
 typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
     SecondaryAllocator> Allocator;
 Allocator *allocator();
@@ -161,7 +173,8 @@
 //   freed           : 1
 //   tid             : kTidBits
 //   epoch           : kClkBits
-//   is_write        : 1
+//   is_atomic       : 1
+//   is_read         : 1
 //   size_log        : 2
 //   addr0           : 3
 class Shadow : public FastState {
@@ -185,13 +198,26 @@
   }
 
   void SetWrite(unsigned kAccessIsWrite) {
-    DCHECK_EQ(x_ & 32, 0);
-    if (kAccessIsWrite)
-      x_ |= 32;
-    DCHECK_EQ(kAccessIsWrite, is_write());
+    DCHECK_EQ(x_ & kReadBit, 0);
+    if (!kAccessIsWrite)
+      x_ |= kReadBit;
+    DCHECK_EQ(kAccessIsWrite, IsWrite());
   }
 
-  bool IsZero() const { return x_ == 0; }
+  void SetAtomic(bool kIsAtomic) {
+    DCHECK(!IsAtomic());
+    if (kIsAtomic)
+      x_ |= kAtomicBit;
+    DCHECK_EQ(IsAtomic(), kIsAtomic);
+  }
+
+  bool IsAtomic() const {
+    return x_ & kAtomicBit;
+  }
+
+  bool IsZero() const {
+    return x_ == 0;
+  }
 
   static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
     u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
@@ -238,7 +264,8 @@
   }
   u64 addr0() const { return x_ & 7; }
   u64 size() const { return 1ull << size_log(); }
-  bool is_write() const { return x_ & 32; }
+  bool IsWrite() const { return !IsRead(); }
+  bool IsRead() const { return x_ & kReadBit; }
 
   // The idea behind the freed bit is as follows.
   // When the memory is freed (or otherwise unaccessible) we write to the shadow
@@ -253,13 +280,46 @@
      x_ |= kFreedBit;
   }
 
+  bool IsFreed() const {
+    return x_ & kFreedBit;
+  }
+
   bool GetFreedAndReset() {
     bool res = x_ & kFreedBit;
     x_ &= ~kFreedBit;
     return res;
   }
 
+  bool IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
+    // analyzes 5-th bit (is_read) and 6-th bit (is_atomic)
+    bool v = x_ & u64(((kIsWrite ^ 1) << kReadShift)
+        | (kIsAtomic << kAtomicShift));
+    DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
+    return v;
+  }
+
+  bool IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
+    bool v = ((x_ >> kReadShift) & 3)
+        <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
+    DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
+        (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite));
+    return v;
+  }
+
+  bool IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
+    bool v = ((x_ >> kReadShift) & 3)
+        >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
+    DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
+        (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite));
+    return v;
+  }
+
  private:
+  static const u64 kReadShift   = 5;
+  static const u64 kReadBit     = 1ull << kReadShift;
+  static const u64 kAtomicShift = 6;
+  static const u64 kAtomicBit   = 1ull << kAtomicShift;
+
   u64 size_log() const { return (x_ >> 3) & 3; }
 
   static bool TwoRangesIntersectSLOW(const Shadow s1, const Shadow s2) {
@@ -314,7 +374,9 @@
   const int tid;
   const int unique_id;
   int in_rtl;
+  bool in_symbolizer;
   bool is_alive;
+  bool is_freeing;
   const uptr stk_addr;
   const uptr stk_size;
   const uptr tls_addr;
@@ -377,6 +439,7 @@
   u64 epoch0;
   u64 epoch1;
   StackTrace creation_stack;
+  int creation_tid;
   ThreadDeadInfo *dead_info;
   ThreadContext *dead_next;  // In dead thread list.
   char *name;  // As annotated by user.
@@ -482,6 +545,7 @@
 }
 
 void MapShadow(uptr addr, uptr size);
+void MapThreadTrace(uptr addr, uptr size);
 void InitializeShadowMemory();
 void InitializeInterceptors();
 void InitializeDynamicAnnotations();
@@ -489,11 +553,14 @@
 void ReportRace(ThreadState *thr);
 bool OutputReport(Context *ctx,
                   const ScopedReport &srep,
-                  const ReportStack *suppress_stack = 0);
+                  const ReportStack *suppress_stack1 = 0,
+                  const ReportStack *suppress_stack2 = 0);
 bool IsFiredSuppression(Context *ctx,
                         const ScopedReport &srep,
                         const StackTrace &trace);
 bool IsExpectedReport(uptr addr, uptr size);
+bool FrameIsInternal(const ReportStack *frame);
+ReportStack *SkipTsanInternalFrames(ReportStack *ent);
 
 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
 # define DPrintf Printf
@@ -509,21 +576,50 @@
 
 u32 CurrentStackId(ThreadState *thr, uptr pc);
 void PrintCurrentStack(ThreadState *thr, uptr pc);
+void PrintCurrentStackSlow();  // uses libunwind
 
 void Initialize(ThreadState *thr);
 int Finalize(ThreadState *thr);
 
+SyncVar* GetJavaSync(ThreadState *thr, uptr pc, uptr addr,
+                     bool write_lock, bool create);
+SyncVar* GetAndRemoveJavaSync(ThreadState *thr, uptr pc, uptr addr);
+
 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite);
+    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic);
 void MemoryAccessImpl(ThreadState *thr, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite,
+    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
     u64 *shadow_mem, Shadow cur);
-void MemoryRead1Byte(ThreadState *thr, uptr pc, uptr addr);
-void MemoryWrite1Byte(ThreadState *thr, uptr pc, uptr addr);
-void MemoryRead8Byte(ThreadState *thr, uptr pc, uptr addr);
-void MemoryWrite8Byte(ThreadState *thr, uptr pc, uptr addr);
 void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
-                       uptr size, bool is_write);
+    uptr size, bool is_write);
+void MemoryAccessRangeStep(ThreadState *thr, uptr pc, uptr addr,
+    uptr size, uptr step, bool is_write);
+
+const int kSizeLog1 = 0;
+const int kSizeLog2 = 1;
+const int kSizeLog4 = 2;
+const int kSizeLog8 = 3;
+
+void ALWAYS_INLINE INLINE MemoryRead(ThreadState *thr, uptr pc,
+                                     uptr addr, int kAccessSizeLog) {
+  MemoryAccess(thr, pc, addr, kAccessSizeLog, false, false);
+}
+
+void ALWAYS_INLINE INLINE MemoryWrite(ThreadState *thr, uptr pc,
+                                      uptr addr, int kAccessSizeLog) {
+  MemoryAccess(thr, pc, addr, kAccessSizeLog, true, false);
+}
+
+void ALWAYS_INLINE INLINE MemoryReadAtomic(ThreadState *thr, uptr pc,
+                                           uptr addr, int kAccessSizeLog) {
+  MemoryAccess(thr, pc, addr, kAccessSizeLog, false, true);
+}
+
+void ALWAYS_INLINE INLINE MemoryWriteAtomic(ThreadState *thr, uptr pc,
+                                            uptr addr, int kAccessSizeLog) {
+  MemoryAccess(thr, pc, addr, kAccessSizeLog, true, true);
+}
+
 void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
 void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
 void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
index e5c61d0..a07f6a2 100644
--- a/lib/tsan/rtl/tsan_rtl_mutex.cc
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -26,8 +26,12 @@
   CHECK_GT(thr->in_rtl, 0);
   DPrintf("#%d: MutexCreate %zx\n", thr->tid, addr);
   StatInc(thr, StatMutexCreate);
-  if (!linker_init && IsAppMem(addr))
-    MemoryWrite1Byte(thr, pc, addr);
+  if (!linker_init && IsAppMem(addr)) {
+    CHECK(!thr->is_freeing);
+    thr->is_freeing = true;
+    MemoryWrite(thr, pc, addr, kSizeLog1);
+    thr->is_freeing = false;
+  }
   SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   s->is_rw = rw;
   s->is_recursive = recursive;
@@ -49,12 +53,17 @@
   SyncVar *s = ctx->synctab.GetAndRemove(thr, pc, addr);
   if (s == 0)
     return;
-  if (IsAppMem(addr))
-    MemoryWrite1Byte(thr, pc, addr);
+  if (IsAppMem(addr)) {
+    CHECK(!thr->is_freeing);
+    thr->is_freeing = true;
+    MemoryWrite(thr, pc, addr, kSizeLog1);
+    thr->is_freeing = false;
+  }
   if (flags()->report_destroy_locked
       && s->owner_tid != SyncVar::kInvalidTid
       && !s->is_broken) {
     s->is_broken = true;
+    Lock l(&ctx->thread_mtx);
     ScopedReport rep(ReportTypeMutexDestroyLocked);
     rep.AddMutex(s);
     StackTrace trace;
@@ -74,7 +83,7 @@
   CHECK_GT(thr->in_rtl, 0);
   DPrintf("#%d: MutexLock %zx\n", thr->tid, addr);
   if (IsAppMem(addr))
-    MemoryRead1Byte(thr, pc, addr);
+    MemoryReadAtomic(thr, pc, addr, kSizeLog1);
   SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId());
@@ -107,7 +116,7 @@
   CHECK_GT(thr->in_rtl, 0);
   DPrintf("#%d: MutexUnlock %zx\n", thr->tid, addr);
   if (IsAppMem(addr))
-    MemoryRead1Byte(thr, pc, addr);
+    MemoryReadAtomic(thr, pc, addr, kSizeLog1);
   SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
@@ -145,7 +154,7 @@
   DPrintf("#%d: MutexReadLock %zx\n", thr->tid, addr);
   StatInc(thr, StatMutexReadLock);
   if (IsAppMem(addr))
-    MemoryRead1Byte(thr, pc, addr);
+    MemoryReadAtomic(thr, pc, addr, kSizeLog1);
   SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, false);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId());
@@ -166,7 +175,7 @@
   DPrintf("#%d: MutexReadUnlock %zx\n", thr->tid, addr);
   StatInc(thr, StatMutexReadUnlock);
   if (IsAppMem(addr))
-    MemoryRead1Byte(thr, pc, addr);
+    MemoryReadAtomic(thr, pc, addr, kSizeLog1);
   SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
@@ -187,7 +196,7 @@
   CHECK_GT(thr->in_rtl, 0);
   DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr);
   if (IsAppMem(addr))
-    MemoryRead1Byte(thr, pc, addr);
+    MemoryReadAtomic(thr, pc, addr, kSizeLog1);
   SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   bool write = true;
   if (s->owner_tid == SyncVar::kInvalidTid) {
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
index c4dcdfb..de9a0e9 100644
--- a/lib/tsan/rtl/tsan_rtl_report.cc
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -15,6 +15,7 @@
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
 #include "tsan_platform.h"
 #include "tsan_rtl.h"
 #include "tsan_suppressions.h"
@@ -23,17 +24,21 @@
 #include "tsan_sync.h"
 #include "tsan_mman.h"
 #include "tsan_flags.h"
+#include "tsan_fd.h"
 
 namespace __tsan {
 
 using namespace __sanitizer;  // NOLINT
 
+static ReportStack *SymbolizeStack(const StackTrace& trace);
+
 void TsanCheckFailed(const char *file, int line, const char *cond,
                      u64 v1, u64 v2) {
   ScopedInRtl in_rtl;
   Printf("FATAL: ThreadSanitizer CHECK failed: "
          "%s:%d \"%s\" (0x%zx, 0x%zx)\n",
          file, line, cond, (uptr)v1, (uptr)v2);
+  PrintCurrentStackSlow();
   Die();
 }
 
@@ -120,6 +125,7 @@
 
 ScopedReport::ScopedReport(ReportType typ) {
   ctx_ = CTX();
+  ctx_->thread_mtx.CheckLocked();
   void *mem = internal_alloc(MBlockReport, sizeof(ReportDesc));
   rep_ = new(mem) ReportDesc;
   rep_->typ = typ;
@@ -144,7 +150,8 @@
   mop->tid = s.tid();
   mop->addr = addr + s.addr0();
   mop->size = s.size();
-  mop->write = s.is_write();
+  mop->write = s.IsWrite();
+  mop->atomic = s.IsAtomic();
   mop->stack = SymbolizeStack(*stack);
   for (uptr i = 0; i < mset->Size(); i++) {
     MutexSet::Desc d = mset->Get(i);
@@ -180,20 +187,43 @@
   rt->pid = tctx->os_id;
   rt->running = (tctx->status == ThreadStatusRunning);
   rt->name = tctx->name ? internal_strdup(tctx->name) : 0;
+  rt->parent_tid = tctx->creation_tid;
   rt->stack = SymbolizeStack(tctx->creation_stack);
 }
 
 #ifndef TSAN_GO
 static ThreadContext *FindThread(int unique_id) {
-  CTX()->thread_mtx.CheckLocked();
+  Context *ctx = CTX();
+  ctx->thread_mtx.CheckLocked();
   for (unsigned i = 0; i < kMaxTid; i++) {
-    ThreadContext *tctx = CTX()->threads[i];
+    ThreadContext *tctx = ctx->threads[i];
     if (tctx && tctx->unique_id == unique_id) {
       return tctx;
     }
   }
   return 0;
 }
+
+ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack) {
+  Context *ctx = CTX();
+  ctx->thread_mtx.CheckLocked();
+  for (unsigned i = 0; i < kMaxTid; i++) {
+    ThreadContext *tctx = ctx->threads[i];
+    if (tctx == 0 || tctx->status != ThreadStatusRunning)
+      continue;
+    ThreadState *thr = tctx->thr;
+    CHECK(thr);
+    if (addr >= thr->stk_addr && addr < thr->stk_addr + thr->stk_size) {
+      *is_stack = true;
+      return tctx;
+    }
+    if (addr >= thr->tls_addr && addr < thr->tls_addr + thr->tls_size) {
+      *is_stack = false;
+      return tctx;
+    }
+  }
+  return 0;
+}
 #endif
 
 void ScopedReport::AddMutex(const SyncVar *s) {
@@ -226,6 +256,29 @@
   if (addr == 0)
     return;
 #ifndef TSAN_GO
+  int fd = -1;
+  int creat_tid = -1;
+  u32 creat_stack = 0;
+  if (FdLocation(addr, &fd, &creat_tid, &creat_stack)
+      || FdLocation(AlternativeAddress(addr), &fd, &creat_tid, &creat_stack)) {
+    void *mem = internal_alloc(MBlockReportLoc, sizeof(ReportLocation));
+    ReportLocation *loc = new(mem) ReportLocation();
+    rep_->locs.PushBack(loc);
+    loc->type = ReportLocationFD;
+    loc->fd = fd;
+    loc->tid = creat_tid;
+    uptr ssz = 0;
+    const uptr *stack = StackDepotGet(creat_stack, &ssz);
+    if (stack) {
+      StackTrace trace;
+      trace.Init(stack, ssz);
+      loc->stack = SymbolizeStack(trace);
+    }
+    ThreadContext *tctx = FindThread(creat_tid);
+    if (tctx)
+      AddThread(tctx);
+    return;
+  }
   if (allocator()->PointerIsMine((void*)addr)) {
     MBlock *b = user_mblock(0, (void*)addr);
     ThreadContext *tctx = FindThread(b->alloc_tid);
@@ -251,25 +304,21 @@
       AddThread(tctx);
     return;
   }
-#endif
-  ReportStack *symb = SymbolizeData(addr);
-  if (symb) {
+  bool is_stack = false;
+  if (ThreadContext *tctx = IsThreadStackOrTls(addr, &is_stack)) {
     void *mem = internal_alloc(MBlockReportLoc, sizeof(ReportLocation));
     ReportLocation *loc = new(mem) ReportLocation();
     rep_->locs.PushBack(loc);
-    loc->type = ReportLocationGlobal;
-    loc->addr = addr;
-    loc->size = size;
-    loc->module = symb->module ? internal_strdup(symb->module) : 0;
-    loc->offset = symb->offset;
-    loc->tid = 0;
-    loc->name = symb->func ? internal_strdup(symb->func) : 0;
-    loc->file = symb->file ? internal_strdup(symb->file) : 0;
-    loc->line = symb->line;
-    loc->stack = 0;
-    internal_free(symb);
+    loc->type = is_stack ? ReportLocationStack : ReportLocationTLS;
+    loc->tid = tctx->tid;
+    AddThread(tctx);
+  }
+  ReportLocation *loc = SymbolizeData(addr);
+  if (loc) {
+    rep_->locs.PushBack(loc);
     return;
   }
+#endif
 }
 
 #ifndef TSAN_GO
@@ -363,7 +412,7 @@
     uptr addr_min, uptr addr_max) {
   Context *ctx = CTX();
   bool equal_stack = false;
-  RacyStacks hash = {};
+  RacyStacks hash;
   if (flags()->suppress_equal_stacks) {
     hash.hash[0] = md5_hash(traces[0].Begin(), traces[0].Size() * sizeof(uptr));
     hash.hash[1] = md5_hash(traces[1].Begin(), traces[1].Size() * sizeof(uptr));
@@ -416,9 +465,12 @@
 
 bool OutputReport(Context *ctx,
                   const ScopedReport &srep,
-                  const ReportStack *suppress_stack) {
+                  const ReportStack *suppress_stack1,
+                  const ReportStack *suppress_stack2) {
   const ReportDesc *rep = srep.GetReport();
-  const uptr suppress_pc = IsSuppressed(rep->typ, suppress_stack);
+  uptr suppress_pc = IsSuppressed(rep->typ, suppress_stack1);
+  if (suppress_pc == 0)
+    suppress_pc = IsSuppressed(rep->typ, suppress_stack2);
   if (suppress_pc != 0) {
     FiredSuppression supp = {srep.GetReport()->typ, suppress_pc};
     ctx->fired_suppressions.PushBack(supp);
@@ -444,6 +496,13 @@
   return false;
 }
 
+bool FrameIsInternal(const ReportStack *frame) {
+  return frame != 0 && frame->file != 0
+      && (internal_strstr(frame->file, "tsan_interceptors.cc") ||
+          internal_strstr(frame->file, "sanitizer_common_interceptors.inc") ||
+          internal_strstr(frame->file, "tsan_interface_"));
+}
+
 // On programs that use Java we see weird reports like:
 // WARNING: ThreadSanitizer: data race (pid=22512)
 //   Read of size 8 at 0x7d2b00084318 by thread 100:
@@ -453,22 +512,20 @@
 //     #0 strncpy tsan_interceptors.cc:501 (foo+0x00000d8e0919)
 //     #1 <null> <null>:0 (0x7f7ad9b42707)
 static bool IsJavaNonsense(const ReportDesc *rep) {
+#ifndef TSAN_GO
   for (uptr i = 0; i < rep->mops.Size(); i++) {
     ReportMop *mop = rep->mops[i];
     ReportStack *frame = mop->stack;
-    if (frame != 0 && frame->func != 0
-        && (internal_strcmp(frame->func, "memset") == 0
-        || internal_strcmp(frame->func, "memcpy") == 0
-        || internal_strcmp(frame->func, "memmove") == 0
-        || internal_strcmp(frame->func, "strcmp") == 0
-        || internal_strcmp(frame->func, "strncpy") == 0
-        || internal_strcmp(frame->func, "strlen") == 0
-        || internal_strcmp(frame->func, "free") == 0
-        || internal_strcmp(frame->func, "pthread_mutex_lock") == 0)) {
+    if (frame == 0
+        || (frame->func == 0 && frame->file == 0 && frame->line == 0
+          && frame->module == 0)) {
+      return true;
+    }
+    if (FrameIsInternal(frame)) {
       frame = frame->next;
       if (frame == 0
           || (frame->func == 0 && frame->file == 0 && frame->line == 0
-            && frame->module == 0)) {
+          && frame->module == 0)) {
         if (frame) {
           FiredSuppression supp = {rep->typ, frame->pc};
           CTX()->fired_suppressions.PushBack(supp);
@@ -477,6 +534,20 @@
       }
     }
   }
+#endif
+  return false;
+}
+
+static bool RaceBetweenAtomicAndFree(ThreadState *thr) {
+  Shadow s0(thr->racy_state[0]);
+  Shadow s1(thr->racy_state[1]);
+  CHECK(!(s0.IsAtomic() && s1.IsAtomic()));
+  if (!s0.IsAtomic() && !s1.IsAtomic())
+    return true;
+  if (s0.IsAtomic() && s1.IsFreed())
+    return true;
+  if (s1.IsAtomic() && thr->is_freeing)
+    return true;
   return false;
 }
 
@@ -485,6 +556,13 @@
     return;
   ScopedInRtl in_rtl;
 
+  if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr))
+    return;
+
+  if (thr->in_signal_handler)
+    Printf("ThreadSanitizer: printing report from signal handler."
+           " Can crash or hang.\n");
+
   bool freed = false;
   {
     Shadow s(thr->racy_state[1]);
@@ -551,7 +629,8 @@
   }
 #endif
 
-  if (!OutputReport(ctx, rep, rep.GetReport()->mops[0]->stack))
+  if (!OutputReport(ctx, rep, rep.GetReport()->mops[0]->stack,
+                              rep.GetReport()->mops[1]->stack))
     return;
 
   AddRacyStacks(thr, traces, addr_min, addr_max);
@@ -563,4 +642,16 @@
   PrintStack(SymbolizeStack(trace));
 }
 
+void PrintCurrentStackSlow() {
+#ifndef TSAN_GO
+  __sanitizer::StackTrace *ptrace = new(internal_alloc(MBlockStackTrace,
+      sizeof(__sanitizer::StackTrace))) __sanitizer::StackTrace;
+  ptrace->SlowUnwindStack(__sanitizer::StackTrace::GetCurrentPc(),
+      kStackTraceMax);
+  StackTrace trace;
+  trace.Init(ptrace->trace, ptrace->size);
+  PrintStack(SymbolizeStack(trace));
+#endif
+}
+
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index 2277a08..f25fb41 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -156,6 +156,7 @@
     thr->clock.release(&tctx->sync);
     StatInc(thr, StatSyncRelease);
     tctx->creation_stack.ObtainCurrent(thr, pc);
+    tctx->creation_tid = thr->tid;
   }
   return tid;
 }
@@ -208,6 +209,9 @@
   thr->shadow_stack_pos = thr->shadow_stack;
   thr->shadow_stack_end = thr->shadow_stack + kInitStackSize;
 #endif
+#ifndef TSAN_GO
+  AllocatorThreadStart(thr);
+#endif
   tctx->thr = thr;
   thr->fast_synch_epoch = tctx->epoch0;
   thr->clock.set(tid, tctx->epoch0);
@@ -268,7 +272,7 @@
   tctx->epoch1 = thr->fast_state.epoch();
 
 #ifndef TSAN_GO
-  AlloctorThreadFinish(thr);
+  AllocatorThreadFinish(thr);
 #endif
   thr->~ThreadState();
   StatAggregate(ctx->stat, thr->stat);
@@ -393,7 +397,7 @@
     Shadow cur(fast_state);
     cur.SetWrite(is_write);
     cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write,
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
         shadow_mem, cur);
   }
   if (unaligned)
@@ -404,7 +408,7 @@
     Shadow cur(fast_state);
     cur.SetWrite(is_write);
     cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write,
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
         shadow_mem, cur);
     shadow_mem += kShadowCnt;
   }
@@ -414,24 +418,30 @@
     Shadow cur(fast_state);
     cur.SetWrite(is_write);
     cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write,
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
         shadow_mem, cur);
   }
 }
 
-void MemoryRead1Byte(ThreadState *thr, uptr pc, uptr addr) {
-  MemoryAccess(thr, pc, addr, 0, 0);
-}
+void MemoryAccessRangeStep(ThreadState *thr, uptr pc, uptr addr,
+    uptr size, uptr step, bool is_write) {
+  if (size == 0)
+    return;
+  FastState fast_state = thr->fast_state;
+  if (fast_state.GetIgnoreBit())
+    return;
+  StatInc(thr, StatMopRange);
+  fast_state.IncrementEpoch();
+  thr->fast_state = fast_state;
+  TraceAddEvent(thr, fast_state, EventTypeMop, pc);
 
-void MemoryWrite1Byte(ThreadState *thr, uptr pc, uptr addr) {
-  MemoryAccess(thr, pc, addr, 0, 1);
-}
-
-void MemoryRead8Byte(ThreadState *thr, uptr pc, uptr addr) {
-  MemoryAccess(thr, pc, addr, 3, 0);
-}
-
-void MemoryWrite8Byte(ThreadState *thr, uptr pc, uptr addr) {
-  MemoryAccess(thr, pc, addr, 3, 1);
+  for (uptr addr_end = addr + size; addr < addr_end; addr += step) {
+    u64 *shadow_mem = (u64*)MemToShadow(addr);
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kSizeLog1);
+    MemoryAccessImpl(thr, addr, kSizeLog1, is_write, false,
+        shadow_mem, cur);
+  }
 }
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_stat.cc b/lib/tsan/rtl/tsan_stat.cc
index 6053d28..2fd3a69 100644
--- a/lib/tsan/rtl/tsan_stat.cc
+++ b/lib/tsan/rtl/tsan_stat.cc
@@ -181,22 +181,45 @@
   name[StatInt_sem_timedwait]            = "  sem_timedwait                   ";
   name[StatInt_sem_post]                 = "  sem_post                        ";
   name[StatInt_sem_getvalue]             = "  sem_getvalue                    ";
+  name[StatInt_stat]                     = "  stat                            ";
+  name[StatInt___xstat]                  = "  __xstat                         ";
+  name[StatInt_stat64]                   = "  stat64                          ";
+  name[StatInt___xstat64]                = "  __xstat64                       ";
+  name[StatInt_lstat]                    = "  lstat                           ";
+  name[StatInt___lxstat]                 = "  __lxstat                        ";
+  name[StatInt_lstat64]                  = "  lstat64                         ";
+  name[StatInt___lxstat64]               = "  __lxstat64                      ";
+  name[StatInt_fstat]                    = "  fstat                           ";
+  name[StatInt___fxstat]                 = "  __fxstat                        ";
+  name[StatInt_fstat64]                  = "  fstat64                         ";
+  name[StatInt___fxstat64]               = "  __fxstat64                      ";
   name[StatInt_open]                     = "  open                            ";
+  name[StatInt_open64]                   = "  open64                          ";
   name[StatInt_creat]                    = "  creat                           ";
+  name[StatInt_creat64]                  = "  creat64                         ";
   name[StatInt_dup]                      = "  dup                             ";
   name[StatInt_dup2]                     = "  dup2                            ";
   name[StatInt_dup3]                     = "  dup3                            ";
   name[StatInt_eventfd]                  = "  eventfd                         ";
+  name[StatInt_signalfd]                 = "  signalfd                        ";
+  name[StatInt_inotify_init]             = "  inotify_init                    ";
+  name[StatInt_inotify_init1]            = "  inotify_init1                   ";
   name[StatInt_socket]                   = "  socket                          ";
+  name[StatInt_socketpair]               = "  socketpair                      ";
   name[StatInt_connect]                  = "  connect                         ";
+  name[StatInt_bind]                     = "  bind                            ";
+  name[StatInt_listen]                   = "  listen                          ";
   name[StatInt_accept]                   = "  accept                          ";
   name[StatInt_accept4]                  = "  accept4                         ";
   name[StatInt_epoll_create]             = "  epoll_create                    ";
   name[StatInt_epoll_create1]            = "  epoll_create1                   ";
   name[StatInt_close]                    = "  close                           ";
+  name[StatInt___close]                  = "  __close                         ";
+  name[StatInt___res_iclose]             = "  __res_iclose                    ";
   name[StatInt_pipe]                     = "  pipe                            ";
   name[StatInt_pipe2]                    = "  pipe2                           ";
   name[StatInt_read]                     = "  read                            ";
+  name[StatInt_prctl]                    = "  prctl                           ";
   name[StatInt_pread]                    = "  pread                           ";
   name[StatInt_pread64]                  = "  pread64                         ";
   name[StatInt_readv]                    = "  readv                           ";
@@ -212,6 +235,8 @@
   name[StatInt_recvmsg]                  = "  recvmsg                         ";
   name[StatInt_unlink]                   = "  unlink                          ";
   name[StatInt_fopen]                    = "  fopen                           ";
+  name[StatInt_freopen]                  = "  freopen                         ";
+  name[StatInt_fclose]                   = "  fclose                          ";
   name[StatInt_fread]                    = "  fread                           ";
   name[StatInt_fwrite]                   = "  fwrite                          ";
   name[StatInt_puts]                     = "  puts                            ";
@@ -225,6 +250,29 @@
   name[StatInt_usleep]                   = "  usleep                          ";
   name[StatInt_nanosleep]                = "  nanosleep                       ";
   name[StatInt_gettimeofday]             = "  gettimeofday                    ";
+  name[StatInt_fork]                     = "  fork                            ";
+  name[StatInt_vscanf]                   = "  vscanf                          ";
+  name[StatInt_vsscanf]                  = "  vsscanf                         ";
+  name[StatInt_vfscanf]                  = "  vfscanf                         ";
+  name[StatInt_scanf]                    = "  scanf                           ";
+  name[StatInt_sscanf]                   = "  sscanf                          ";
+  name[StatInt_fscanf]                   = "  fscanf                          ";
+  name[StatInt___isoc99_vscanf]          = "  vscanf                          ";
+  name[StatInt___isoc99_vsscanf]         = "  vsscanf                         ";
+  name[StatInt___isoc99_vfscanf]         = "  vfscanf                         ";
+  name[StatInt___isoc99_scanf]           = "  scanf                           ";
+  name[StatInt___isoc99_sscanf]          = "  sscanf                          ";
+  name[StatInt___isoc99_fscanf]          = "  fscanf                          ";
+  name[StatInt_on_exit]                  = "  on_exit                         ";
+  name[StatInt___cxa_atexit]             = "  __cxa_atexit                    ";
+  name[StatInt_localtime]                = "  localtime                       ";
+  name[StatInt_localtime_r]              = "  localtime_r                     ";
+  name[StatInt_gmtime]                   = "  gmtime                          ";
+  name[StatInt_gmtime_r]                 = "  gmtime_r                        ";
+  name[StatInt_ctime]                    = "  ctime                           ";
+  name[StatInt_ctime_r]                  = "  ctime_r                         ";
+  name[StatInt_asctime]                  = "  asctime                         ";
+  name[StatInt_asctime_r]                = "  asctime_r                       ";
 
   name[StatAnnotation]                   = "Dynamic annotations               ";
   name[StatAnnotateHappensBefore]        = "  HappensBefore                   ";
@@ -269,6 +317,8 @@
   name[StatMtxAtExit]                    = "  Atexit                          ";
   name[StatMtxAnnotations]               = "  Annotations                     ";
   name[StatMtxMBlock]                    = "  MBlock                          ";
+  name[StatMtxJavaMBlock]                = "  JavaMBlock                      ";
+  name[StatMtxFD]                        = "  FD                              ";
 
   Printf("Statistics:\n");
   for (int i = 0; i < StatCnt; i++)
diff --git a/lib/tsan/rtl/tsan_stat.h b/lib/tsan/rtl/tsan_stat.h
index b144ba7..e4362b0 100644
--- a/lib/tsan/rtl/tsan_stat.h
+++ b/lib/tsan/rtl/tsan_stat.h
@@ -102,6 +102,7 @@
   StatInt_realloc,
   StatInt_free,
   StatInt_cfree,
+  StatInt_malloc_usable_size,
   StatInt_mmap,
   StatInt_mmap64,
   StatInt_munmap,
@@ -176,22 +177,45 @@
   StatInt_sem_timedwait,
   StatInt_sem_post,
   StatInt_sem_getvalue,
+  StatInt_stat,
+  StatInt___xstat,
+  StatInt_stat64,
+  StatInt___xstat64,
+  StatInt_lstat,
+  StatInt___lxstat,
+  StatInt_lstat64,
+  StatInt___lxstat64,
+  StatInt_fstat,
+  StatInt___fxstat,
+  StatInt_fstat64,
+  StatInt___fxstat64,
   StatInt_open,
+  StatInt_open64,
   StatInt_creat,
+  StatInt_creat64,
   StatInt_dup,
   StatInt_dup2,
   StatInt_dup3,
   StatInt_eventfd,
+  StatInt_signalfd,
+  StatInt_inotify_init,
+  StatInt_inotify_init1,
   StatInt_socket,
+  StatInt_socketpair,
   StatInt_connect,
+  StatInt_bind,
+  StatInt_listen,
   StatInt_accept,
   StatInt_accept4,
   StatInt_epoll_create,
   StatInt_epoll_create1,
   StatInt_close,
+  StatInt___close,
+  StatInt___res_iclose,
   StatInt_pipe,
   StatInt_pipe2,
   StatInt_read,
+  StatInt_prctl,
   StatInt_pread,
   StatInt_pread64,
   StatInt_readv,
@@ -207,6 +231,8 @@
   StatInt_recvmsg,
   StatInt_unlink,
   StatInt_fopen,
+  StatInt_freopen,
+  StatInt_fclose,
   StatInt_fread,
   StatInt_fwrite,
   StatInt_puts,
@@ -224,6 +250,29 @@
   StatInt_usleep,
   StatInt_nanosleep,
   StatInt_gettimeofday,
+  StatInt_fork,
+  StatInt_vscanf,
+  StatInt_vsscanf,
+  StatInt_vfscanf,
+  StatInt_scanf,
+  StatInt_sscanf,
+  StatInt_fscanf,
+  StatInt___isoc99_vscanf,
+  StatInt___isoc99_vsscanf,
+  StatInt___isoc99_vfscanf,
+  StatInt___isoc99_scanf,
+  StatInt___isoc99_sscanf,
+  StatInt___isoc99_fscanf,
+  StatInt_on_exit,
+  StatInt___cxa_atexit,
+  StatInt_localtime,
+  StatInt_localtime_r,
+  StatInt_gmtime,
+  StatInt_gmtime_r,
+  StatInt_ctime,
+  StatInt_ctime_r,
+  StatInt_asctime,
+  StatInt_asctime_r,
 
   // Dynamic annotations.
   StatAnnotation,
@@ -271,6 +320,8 @@
   StatMtxAnnotations,
   StatMtxAtExit,
   StatMtxMBlock,
+  StatMtxJavaMBlock,
+  StatMtxFD,
 
   // This must be the last.
   StatCnt
diff --git a/lib/tsan/rtl/tsan_suppressions.cc b/lib/tsan/rtl/tsan_suppressions.cc
index 5316f6d..941c208 100644
--- a/lib/tsan/rtl/tsan_suppressions.cc
+++ b/lib/tsan/rtl/tsan_suppressions.cc
@@ -19,6 +19,13 @@
 #include "tsan_mman.h"
 #include "tsan_platform.h"
 
+// Can be overriden in frontend.
+#ifndef TSAN_GO
+extern "C" const char *WEAK __tsan_default_suppressions() {
+  return 0;
+}
+#endif
+
 namespace __tsan {
 
 static Suppression *g_suppressions;
@@ -31,7 +38,7 @@
     internal_snprintf(tmp.data(), tmp.size(), "%s", filename);
   else
     internal_snprintf(tmp.data(), tmp.size(), "%s/%s", GetPwd(), filename);
-  fd_t fd = internal_open(tmp.data(), false);
+  fd_t fd = OpenFile(tmp.data(), false);
   if (fd == kInvalidFd) {
     Printf("ThreadSanitizer: failed to open suppressions file '%s'\n",
                tmp.data());
@@ -80,8 +87,7 @@
   return true;
 }
 
-Suppression *SuppressionParse(const char* supp) {
-  Suppression *head = 0;
+Suppression *SuppressionParse(Suppression *head, const char* supp) {
   const char *line = supp;
   while (line) {
     while (line[0] == ' ' || line[0] == '\t')
@@ -130,8 +136,12 @@
 }
 
 void InitializeSuppressions() {
-  char *supp = ReadFile(flags()->suppressions);
-  g_suppressions = SuppressionParse(supp);
+  const char *supp = ReadFile(flags()->suppressions);
+  g_suppressions = SuppressionParse(0, supp);
+#ifndef TSAN_GO
+  supp = __tsan_default_suppressions();
+  g_suppressions = SuppressionParse(g_suppressions, supp);
+#endif
 }
 
 uptr IsSuppressed(ReportType typ, const ReportStack *stack) {
@@ -152,7 +162,8 @@
     for (Suppression *supp = g_suppressions; supp; supp = supp->next) {
       if (stype == supp->type &&
           (SuppressionMatch(supp->templ, frame->func) ||
-          SuppressionMatch(supp->templ, frame->file))) {
+           SuppressionMatch(supp->templ, frame->file) ||
+           SuppressionMatch(supp->templ, frame->module))) {
         DPrintf("ThreadSanitizer: matched suppression '%s'\n", supp->templ);
         return frame->pc;
       }
diff --git a/lib/tsan/rtl/tsan_suppressions.h b/lib/tsan/rtl/tsan_suppressions.h
index 61a4cca..c588316 100644
--- a/lib/tsan/rtl/tsan_suppressions.h
+++ b/lib/tsan/rtl/tsan_suppressions.h
@@ -35,7 +35,7 @@
   char *templ;
 };
 
-Suppression *SuppressionParse(const char* supp);
+Suppression *SuppressionParse(Suppression *head, const char* supp);
 bool SuppressionMatch(char *templ, const char *str);
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_symbolize.cc b/lib/tsan/rtl/tsan_symbolize.cc
index 48bee67..a58b958 100644
--- a/lib/tsan/rtl/tsan_symbolize.cc
+++ b/lib/tsan/rtl/tsan_symbolize.cc
@@ -18,9 +18,24 @@
 #include "sanitizer_common/sanitizer_symbolizer.h"
 #include "tsan_flags.h"
 #include "tsan_report.h"
+#include "tsan_rtl.h"
 
 namespace __tsan {
 
+struct ScopedInSymbolizer {
+  ScopedInSymbolizer() {
+    ThreadState *thr = cur_thread();
+    CHECK(!thr->in_symbolizer);
+    thr->in_symbolizer = true;
+  }
+
+  ~ScopedInSymbolizer() {
+    ThreadState *thr = cur_thread();
+    CHECK(thr->in_symbolizer);
+    thr->in_symbolizer = false;
+  }
+};
+
 ReportStack *NewReportStackEntry(uptr addr) {
   ReportStack *ent = (ReportStack*)internal_alloc(MBlockReportStack,
                                                   sizeof(ReportStack));
@@ -29,21 +44,24 @@
   return ent;
 }
 
+// Strip module path to make output shorter.
+static char *StripModuleName(const char *module) {
+  if (module == 0)
+    return 0;
+  const char *short_module_name = internal_strrchr(module, '/');
+  if (short_module_name)
+    short_module_name += 1;
+  else
+    short_module_name = module;
+  return internal_strdup(short_module_name);
+}
+
 static ReportStack *NewReportStackEntry(const AddressInfo &info) {
   ReportStack *ent = NewReportStackEntry(info.address);
-  if (info.module) {
-    // Strip module path to make output shorter.
-    const char *short_module_name = internal_strrchr(info.module, '/');
-    if (short_module_name)
-      short_module_name += 1;
-    else
-      short_module_name = info.module;
-    ent->module = internal_strdup(short_module_name);
-  }
+  ent->module = StripModuleName(info.module);
   ent->offset = info.module_offset;
-  if (info.function) {
+  if (info.function)
     ent->func = internal_strdup(info.function);
-  }
   if (info.file)
     ent->file = internal_strdup(info.file);
   ent->line = info.line;
@@ -52,40 +70,50 @@
 }
 
 ReportStack *SymbolizeCode(uptr addr) {
-  if (flags()->external_symbolizer_path[0]) {
-    static const uptr kMaxAddrFrames = 16;
-    InternalScopedBuffer<AddressInfo> addr_frames(kMaxAddrFrames);
-    for (uptr i = 0; i < kMaxAddrFrames; i++)
-      new(&addr_frames[i]) AddressInfo();
-    uptr addr_frames_num = __sanitizer::SymbolizeCode(addr, addr_frames.data(),
-                                                      kMaxAddrFrames);
-    if (addr_frames_num == 0)
-      return NewReportStackEntry(addr);
-    ReportStack *top = 0;
-    ReportStack *bottom = 0;
-    for (uptr i = 0; i < addr_frames_num; i++) {
-      ReportStack *cur_entry = NewReportStackEntry(addr_frames[i]);
-      CHECK(cur_entry);
-      addr_frames[i].Clear();
-      if (i == 0)
-        top = cur_entry;
-      else
-        bottom->next = cur_entry;
-      bottom = cur_entry;
-    }
-    return top;
+  if (!IsSymbolizerAvailable())
+    return SymbolizeCodeAddr2Line(addr);
+  ScopedInSymbolizer in_symbolizer;
+  static const uptr kMaxAddrFrames = 16;
+  InternalScopedBuffer<AddressInfo> addr_frames(kMaxAddrFrames);
+  for (uptr i = 0; i < kMaxAddrFrames; i++)
+    new(&addr_frames[i]) AddressInfo();
+  uptr addr_frames_num = __sanitizer::SymbolizeCode(addr, addr_frames.data(),
+                                                    kMaxAddrFrames);
+  if (addr_frames_num == 0)
+    return NewReportStackEntry(addr);
+  ReportStack *top = 0;
+  ReportStack *bottom = 0;
+  for (uptr i = 0; i < addr_frames_num; i++) {
+    ReportStack *cur_entry = NewReportStackEntry(addr_frames[i]);
+    CHECK(cur_entry);
+    addr_frames[i].Clear();
+    if (i == 0)
+      top = cur_entry;
+    else
+      bottom->next = cur_entry;
+    bottom = cur_entry;
   }
-  return SymbolizeCodeAddr2Line(addr);
+  return top;
 }
 
-ReportStack *SymbolizeData(uptr addr) {
-  if (flags()->external_symbolizer_path[0]) {
-    AddressInfo frame;
-    if (!__sanitizer::SymbolizeData(addr, &frame))
-      return 0;
-    return NewReportStackEntry(frame);
-  }
-  return SymbolizeDataAddr2Line(addr);
+ReportLocation *SymbolizeData(uptr addr) {
+  if (!IsSymbolizerAvailable())
+    return 0;
+  ScopedInSymbolizer in_symbolizer;
+  DataInfo info;
+  if (!__sanitizer::SymbolizeData(addr, &info))
+    return 0;
+  ReportLocation *ent = (ReportLocation*)internal_alloc(MBlockReportStack,
+                                                        sizeof(ReportLocation));
+  internal_memset(ent, 0, sizeof(*ent));
+  ent->type = ReportLocationGlobal;
+  ent->module = StripModuleName(info.module);
+  ent->offset = info.module_offset;
+  if (info.name)
+    ent->name = internal_strdup(info.name);
+  ent->addr = info.start;
+  ent->size = info.size;
+  return ent;
 }
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_symbolize.h b/lib/tsan/rtl/tsan_symbolize.h
index 115339b..2919304 100644
--- a/lib/tsan/rtl/tsan_symbolize.h
+++ b/lib/tsan/rtl/tsan_symbolize.h
@@ -19,10 +19,9 @@
 namespace __tsan {
 
 ReportStack *SymbolizeCode(uptr addr);
-ReportStack *SymbolizeData(uptr addr);
+ReportLocation *SymbolizeData(uptr addr);
 
 ReportStack *SymbolizeCodeAddr2Line(uptr addr);
-ReportStack *SymbolizeDataAddr2Line(uptr addr);
 
 ReportStack *NewReportStackEntry(uptr addr);
 
diff --git a/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc b/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc
index fc7144e..76926e2 100644
--- a/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc
+++ b/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc
@@ -104,11 +104,11 @@
   m->base = (uptr)info->dlpi_addr;
   m->inp_fd = -1;
   m->out_fd = -1;
-  DPrintf("Module %s %zx\n", m->name, m->base);
+  DPrintf2("Module %s %zx\n", m->name, m->base);
   for (int i = 0; i < info->dlpi_phnum; i++) {
     const Elf64_Phdr *s = &info->dlpi_phdr[i];
-    DPrintf("  Section p_type=%zx p_offset=%zx p_vaddr=%zx p_paddr=%zx"
-            " p_filesz=%zx p_memsz=%zx p_flags=%zx p_align=%zx\n",
+    DPrintf2("  Section p_type=%zx p_offset=%zx p_vaddr=%zx p_paddr=%zx"
+        " p_filesz=%zx p_memsz=%zx p_flags=%zx p_align=%zx\n",
             (uptr)s->p_type, (uptr)s->p_offset, (uptr)s->p_vaddr,
             (uptr)s->p_paddr, (uptr)s->p_filesz, (uptr)s->p_memsz,
             (uptr)s->p_flags, (uptr)s->p_align);
@@ -121,7 +121,7 @@
     sec->end = sec->base + s->p_memsz;
     sec->next = ctx->sections;
     ctx->sections = sec;
-    DPrintf("  Section %zx-%zx\n", sec->base, sec->end);
+    DPrintf2("  Section %zx-%zx\n", sec->base, sec->end);
   }
   return 0;
 }
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
index 38ecc6e..b25346e 100644
--- a/lib/tsan/rtl/tsan_sync.cc
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -57,9 +57,29 @@
   return GetAndLock(0, 0, addr, write_lock, false);
 }
 
+SyncVar* SyncTab::Create(ThreadState *thr, uptr pc, uptr addr) {
+  StatInc(thr, StatSyncCreated);
+  void *mem = internal_alloc(MBlockSync, sizeof(SyncVar));
+  const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
+  SyncVar *res = new(mem) SyncVar(addr, uid);
+#ifndef TSAN_GO
+  res->creation_stack.ObtainCurrent(thr, pc);
+#endif
+  return res;
+}
+
 SyncVar* SyncTab::GetAndLock(ThreadState *thr, uptr pc,
                              uptr addr, bool write_lock, bool create) {
 #ifndef TSAN_GO
+  {  // NOLINT
+    SyncVar *res = GetJavaSync(thr, pc, addr, write_lock, create);
+    if (res)
+      return res;
+  }
+
+  // Here we ask only PrimaryAllocator, because
+  // SecondaryAllocator::PointerIsMine() is slow and we have fallback on
+  // the hashmap anyway.
   if (PrimaryAllocator::PointerIsMine((void*)addr)) {
     MBlock *b = user_mblock(thr, (void*)addr);
     Lock l(&b->mtx);
@@ -71,11 +91,7 @@
     if (res == 0) {
       if (!create)
         return 0;
-      StatInc(thr, StatSyncCreated);
-      void *mem = internal_alloc(MBlockSync, sizeof(SyncVar));
-      const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
-      res = new(mem) SyncVar(addr, uid);
-      res->creation_stack.ObtainCurrent(thr, pc);
+      res = Create(thr, pc, addr);
       res->next = b->head;
       b->head = res;
     }
@@ -110,13 +126,7 @@
         break;
     }
     if (res == 0) {
-      StatInc(thr, StatSyncCreated);
-      void *mem = internal_alloc(MBlockSync, sizeof(SyncVar));
-      const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
-      res = new(mem) SyncVar(addr, uid);
-#ifndef TSAN_GO
-      res->creation_stack.ObtainCurrent(thr, pc);
-#endif
+      res = Create(thr, pc, addr);
       res->next = p->val;
       p->val = res;
     }
@@ -130,6 +140,11 @@
 
 SyncVar* SyncTab::GetAndRemove(ThreadState *thr, uptr pc, uptr addr) {
 #ifndef TSAN_GO
+  {  // NOLINT
+    SyncVar *res = GetAndRemoveJavaSync(thr, pc, addr);
+    if (res)
+      return res;
+  }
   if (PrimaryAllocator::PointerIsMine((void*)addr)) {
     MBlock *b = user_mblock(thr, (void*)addr);
     SyncVar *res = 0;
diff --git a/lib/tsan/rtl/tsan_sync.h b/lib/tsan/rtl/tsan_sync.h
index 34ea55b..77749e2 100644
--- a/lib/tsan/rtl/tsan_sync.h
+++ b/lib/tsan/rtl/tsan_sync.h
@@ -55,7 +55,7 @@
   static const int kInvalidTid = -1;
 
   Mutex mtx;
-  const uptr addr;
+  uptr addr;
   const u64 uid;  // Globally unique id.
   SyncClock clock;
   SyncClock read_clock;  // Used for rw mutexes only.
@@ -96,6 +96,8 @@
   // If the SyncVar does not exist, returns 0.
   SyncVar* GetAndRemove(ThreadState *thr, uptr pc, uptr addr);
 
+  SyncVar* Create(ThreadState *thr, uptr pc, uptr addr);
+
   uptr GetMemoryConsumption(uptr *nsync);
 
  private:
diff --git a/lib/tsan/rtl/tsan_update_shadow_word_inl.h b/lib/tsan/rtl/tsan_update_shadow_word_inl.h
index 2c43555..e7c036c 100644
--- a/lib/tsan/rtl/tsan_update_shadow_word_inl.h
+++ b/lib/tsan/rtl/tsan_update_shadow_word_inl.h
@@ -34,7 +34,7 @@
     if (Shadow::TidsAreEqual(old, cur)) {
       StatInc(thr, StatShadowSameThread);
       if (OldIsInSameSynchEpoch(old, thr)) {
-        if (OldIsRWNotWeaker(old, kAccessIsWrite)) {
+        if (old.IsRWNotWeaker(kAccessIsWrite, kIsAtomic)) {
           // found a slot that holds effectively the same info
           // (that is, same tid, same sync epoch and same size)
           StatInc(thr, StatMopSame);
@@ -43,7 +43,7 @@
         StoreIfNotYetStored(sp, &store_word);
         break;
       }
-      if (OldIsRWWeakerOrEqual(old, kAccessIsWrite))
+      if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))
         StoreIfNotYetStored(sp, &store_word);
       break;
     }
@@ -52,25 +52,23 @@
       StoreIfNotYetStored(sp, &store_word);
       break;
     }
-    if (BothReads(old, kAccessIsWrite))
+    if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))
       break;
     goto RACE;
   }
-
   // Do the memory access intersect?
-  if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) {
+  // In Go all memory accesses are 1 byte, so there can be no intersections.
+  if (kCppMode && Shadow::TwoRangesIntersect(old, cur, kAccessSize)) {
     StatInc(thr, StatShadowIntersect);
     if (Shadow::TidsAreEqual(old, cur)) {
       StatInc(thr, StatShadowSameThread);
       break;
     }
     StatInc(thr, StatShadowAnotherThread);
+    if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))
+      break;
     if (HappensBefore(old, thr))
       break;
-
-    if (BothReads(old, kAccessIsWrite))
-      break;
-
     goto RACE;
   }
   // The accesses do not intersect.
diff --git a/lib/tsan/rtl/tsan_vector.h b/lib/tsan/rtl/tsan_vector.h
index d41063d..64328d0 100644
--- a/lib/tsan/rtl/tsan_vector.h
+++ b/lib/tsan/rtl/tsan_vector.h
@@ -105,6 +105,6 @@
   Vector(const Vector&);
   void operator=(const Vector&);
 };
-}
+}  // namespace __tsan
 
 #endif  // #ifndef TSAN_VECTOR_H
diff --git a/lib/tsan/tests/CMakeLists.txt b/lib/tsan/tests/CMakeLists.txt
index d452f60..0fcc6b2 100644
--- a/lib/tsan/tests/CMakeLists.txt
+++ b/lib/tsan/tests/CMakeLists.txt
@@ -6,7 +6,7 @@
 function(add_tsan_unittest testname)
   # Build unit tests only on 64-bit Linux.
   if(UNIX AND NOT APPLE
-      AND CAN_TARGET_X86_64
+      AND CAN_TARGET_x86_64
       AND CMAKE_SIZEOF_VOID_P EQUAL 8
       AND NOT LLVM_BUILD_32_BITS)
     add_unittest(TsanUnitTests ${testname} ${ARGN})
diff --git a/lib/tsan/tests/rtl/tsan_test_util_linux.cc b/lib/tsan/tests/rtl/tsan_test_util_linux.cc
index dce8db9..a260148 100644
--- a/lib/tsan/tests/rtl/tsan_test_util_linux.cc
+++ b/lib/tsan/tests/rtl/tsan_test_util_linux.cc
@@ -73,7 +73,7 @@
   expect_report_reported = true;
   return true;
 }
-}
+}  // namespace __tsan
 
 static void* allocate_addr(int size, int offset_from_aligned = 0) {
   static uintptr_t foo;
diff --git a/lib/tsan/tests/unit/tsan_mman_test.cc b/lib/tsan/tests/unit/tsan_mman_test.cc
index 1a9a88f..ecbe874 100644
--- a/lib/tsan/tests/unit/tsan_mman_test.cc
+++ b/lib/tsan/tests/unit/tsan_mman_test.cc
@@ -10,10 +10,21 @@
 // This file is a part of ThreadSanitizer (TSan), a race detector.
 //
 //===----------------------------------------------------------------------===//
+#include <limits>
 #include "tsan_mman.h"
 #include "tsan_rtl.h"
 #include "gtest/gtest.h"
 
+extern "C" {
+uptr __tsan_get_current_allocated_bytes();
+uptr __tsan_get_heap_size();
+uptr __tsan_get_free_bytes();
+uptr __tsan_get_unmapped_bytes();
+uptr __tsan_get_estimated_allocated_size(uptr size);
+bool __tsan_get_ownership(void *p);
+uptr __tsan_get_allocated_size(void *p);
+}
+
 namespace __tsan {
 
 TEST(Mman, Internal) {
@@ -106,4 +117,55 @@
   }
 }
 
+TEST(Mman, UsableSize) {
+  ScopedInRtl in_rtl;
+  ThreadState *thr = cur_thread();
+  uptr pc = 0;
+  char *p = (char*)user_alloc(thr, pc, 10);
+  char *p2 = (char*)user_alloc(thr, pc, 20);
+  EXPECT_EQ(0U, user_alloc_usable_size(thr, pc, NULL));
+  EXPECT_EQ(10U, user_alloc_usable_size(thr, pc, p));
+  EXPECT_EQ(20U, user_alloc_usable_size(thr, pc, p2));
+  user_free(thr, pc, p);
+  user_free(thr, pc, p2);
+}
+
+TEST(Mman, Stats) {
+  ScopedInRtl in_rtl;
+  ThreadState *thr = cur_thread();
+
+  uptr alloc0 = __tsan_get_current_allocated_bytes();
+  uptr heap0 = __tsan_get_heap_size();
+  uptr free0 = __tsan_get_free_bytes();
+  uptr unmapped0 = __tsan_get_unmapped_bytes();
+
+  EXPECT_EQ(__tsan_get_estimated_allocated_size(10), (uptr)10);
+  EXPECT_EQ(__tsan_get_estimated_allocated_size(20), (uptr)20);
+  EXPECT_EQ(__tsan_get_estimated_allocated_size(100), (uptr)100);
+
+  char *p = (char*)user_alloc(thr, 0, 10);
+  EXPECT_EQ(__tsan_get_ownership(p), true);
+  EXPECT_EQ(__tsan_get_allocated_size(p), (uptr)10);
+
+  EXPECT_EQ(__tsan_get_current_allocated_bytes(), alloc0 + 16);
+  EXPECT_GE(__tsan_get_heap_size(), heap0);
+  EXPECT_EQ(__tsan_get_free_bytes(), free0);
+  EXPECT_EQ(__tsan_get_unmapped_bytes(), unmapped0);
+
+  user_free(thr, 0, p);
+
+  EXPECT_EQ(__tsan_get_current_allocated_bytes(), alloc0);
+  EXPECT_GE(__tsan_get_heap_size(), heap0);
+  EXPECT_EQ(__tsan_get_free_bytes(), free0);
+  EXPECT_EQ(__tsan_get_unmapped_bytes(), unmapped0);
+}
+
+TEST(Mman, CallocOverflow) {
+  size_t kArraySize = 4096;
+  volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
+  volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;
+  volatile void *p = calloc(kArraySize, kArraySize2);  // Should return 0.
+  EXPECT_EQ(0L, p);
+}
+
 }  // namespace __tsan
diff --git a/lib/tsan/tests/unit/tsan_platform_test.cc b/lib/tsan/tests/unit/tsan_platform_test.cc
index b43dbb4..733cc54 100644
--- a/lib/tsan/tests/unit/tsan_platform_test.cc
+++ b/lib/tsan/tests/unit/tsan_platform_test.cc
@@ -61,29 +61,4 @@
   pthread_join(t, 0);
 }
 
-TEST(Platform, FileOps) {
-  const char *str1 = "qwerty";
-  uptr len1 = internal_strlen(str1);
-  const char *str2 = "zxcv";
-  uptr len2 = internal_strlen(str2);
-
-  fd_t fd = internal_open("./tsan_test.tmp", true);
-  EXPECT_NE(fd, kInvalidFd);
-  EXPECT_EQ(len1, internal_write(fd, str1, len1));
-  EXPECT_EQ(len2, internal_write(fd, str2, len2));
-  internal_close(fd);
-
-  fd = internal_open("./tsan_test.tmp", false);
-  EXPECT_NE(fd, kInvalidFd);
-  EXPECT_EQ(len1 + len2, internal_filesize(fd));
-  char buf[64] = {};
-  EXPECT_EQ(len1, internal_read(fd, buf, len1));
-  EXPECT_EQ(0, internal_memcmp(buf, str1, len1));
-  EXPECT_EQ((char)0, buf[len1 + 1]);
-  internal_memset(buf, 0, len1);
-  EXPECT_EQ(len2, internal_read(fd, buf, len2));
-  EXPECT_EQ(0, internal_memcmp(buf, str2, len2));
-  internal_close(fd);
-}
-
 }  // namespace __tsan
diff --git a/lib/tsan/tests/unit/tsan_shadow_test.cc b/lib/tsan/tests/unit/tsan_shadow_test.cc
index fa9c982..17b1797 100644
--- a/lib/tsan/tests/unit/tsan_shadow_test.cc
+++ b/lib/tsan/tests/unit/tsan_shadow_test.cc
@@ -25,7 +25,7 @@
   EXPECT_EQ(s.GetHistorySize(), 0);
   EXPECT_EQ(s.addr0(), (u64)0);
   EXPECT_EQ(s.size(), (u64)1);
-  EXPECT_EQ(s.is_write(), false);
+  EXPECT_EQ(s.IsWrite(), true);
 
   s.IncrementEpoch();
   EXPECT_EQ(s.epoch(), (u64)23);
diff --git a/lib/tsan/tests/unit/tsan_suppressions_test.cc b/lib/tsan/tests/unit/tsan_suppressions_test.cc
index e1e0c12..decfa32 100644
--- a/lib/tsan/tests/unit/tsan_suppressions_test.cc
+++ b/lib/tsan/tests/unit/tsan_suppressions_test.cc
@@ -20,7 +20,7 @@
 
 TEST(Suppressions, Parse) {
   ScopedInRtl in_rtl;
-  Suppression *supp0 = SuppressionParse(
+  Suppression *supp0 = SuppressionParse(0,
     "race:foo\n"
     " 	race:bar\n"  // NOLINT
     "race:baz	 \n"  // NOLINT
@@ -45,7 +45,7 @@
 
 TEST(Suppressions, Parse2) {
   ScopedInRtl in_rtl;
-  Suppression *supp0 = SuppressionParse(
+  Suppression *supp0 = SuppressionParse(0,
     "  	# first line comment\n"  // NOLINT
     " 	race:bar 	\n"  // NOLINT
     "race:baz* *baz\n"
@@ -64,7 +64,7 @@
 
 TEST(Suppressions, Parse3) {
   ScopedInRtl in_rtl;
-  Suppression *supp0 = SuppressionParse(
+  Suppression *supp0 = SuppressionParse(0,
     "# last suppression w/o line-feed\n"
     "race:foo\n"
     "race:bar"
@@ -81,7 +81,7 @@
 
 TEST(Suppressions, ParseType) {
   ScopedInRtl in_rtl;
-  Suppression *supp0 = SuppressionParse(
+  Suppression *supp0 = SuppressionParse(0,
     "race:foo\n"
     "thread:bar\n"
     "mutex:baz\n"
diff --git a/lib/ubsan/CMakeLists.txt b/lib/ubsan/CMakeLists.txt
index b16983d..b549153 100644
--- a/lib/ubsan/CMakeLists.txt
+++ b/lib/ubsan/CMakeLists.txt
@@ -12,46 +12,28 @@
 
 set(UBSAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
 
+filter_available_targets(UBSAN_SUPPORTED_ARCH
+  x86_64 i386)
+
 set(UBSAN_RUNTIME_LIBRARIES)
 
 if(APPLE)
   # Build universal binary on APPLE.
-  add_library(clang_rt.ubsan_osx STATIC
-    ${UBSAN_SOURCES}
-    $<TARGET_OBJECTS:RTSanitizerCommon.osx>
-    )
-  set_target_compile_flags(clang_rt.ubsan_osx ${UBSAN_CFLAGS})
-  filter_available_targets(UBSAN_TARGETS x86_64 i386)
-  set_target_properties(clang_rt.ubsan_osx PROPERTIES
-    OSX_ARCHITECTURES "${UBSAN_TARGETS}")
+  add_compiler_rt_osx_static_runtime(clang_rt.ubsan_osx
+    ARCH ${UBSAN_SUPPORTED_ARCH}
+    SOURCES ${UBSAN_SOURCES}
+            $<TARGET_OBJECTS:RTSanitizerCommon.osx>
+    CFLAGS ${UBSAN_CFLAGS})
   list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan_osx)
 else()
   # Build separate libraries for each target.
-  if(CAN_TARGET_X86_64)
-    add_library(clang_rt.ubsan-x86_64 STATIC
-      ${UBSAN_SOURCES}
-      $<TARGET_OBJECTS:RTSanitizerCommon.x86_64>
-      )
-    set_target_compile_flags(clang_rt.ubsan-x86_64
-      ${UBSAN_CFLAGS} ${TARGET_X86_64_CFLAGS}
-      )
-    list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan-x86_64)
-  endif()
-  if(CAN_TARGET_I386)
-    add_library(clang_rt.ubsan-i386 STATIC
-      ${UBSAN_SOURCES}
-      $<TARGET_OBJECTS:RTSanitizerCommon.i386>
-      )
-    set_target_compile_flags(clang_rt.ubsan-i386
-      ${UBSAN_CFLAGS} ${TARGET_I386_CFLAGS}
-      )
-    list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan-i386)
-  endif()
+  foreach(arch ${UBSAN_SUPPORTED_ARCH})
+    add_compiler_rt_static_runtime(clang_rt.ubsan-${arch} ${arch}
+      SOURCES ${UBSAN_SOURCES}
+              $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+      CFLAGS ${UBSAN_CFLAGS})
+    list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan-${arch})
+  endforeach()
 endif()
 
-
-set_property(TARGET ${UBSAN_RUNTIME_LIBRARIES} APPEND PROPERTY
-  COMPILE_DEFINITIONS ${UBSAN_COMMON_DEFINITIONS})
-add_clang_compiler_rt_libraries(${UBSAN_RUNTIME_LIBRARIES})
-
 add_subdirectory(lit_tests)
diff --git a/lib/ubsan/lit_tests/CMakeLists.txt b/lib/ubsan/lit_tests/CMakeLists.txt
index 67d786d..565c523 100644
--- a/lib/ubsan/lit_tests/CMakeLists.txt
+++ b/lib/ubsan/lit_tests/CMakeLists.txt
@@ -3,9 +3,9 @@
   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
   )
 
-if("${CMAKE_HOST_SYSTEM}" STREQUAL "${CMAKE_SYSTEM}")
-  # Run UBSan output tests only if we're not cross-compiling,
-  # and can be sure that clang would produce working binaries.
+if(COMPILER_RT_CAN_EXECUTE_TESTS)
+  # Run UBSan output tests only if we're sure that clang would produce
+  # working binaries.
   set(UBSAN_TEST_DEPS
     clang clang-headers FileCheck count not
     ${UBSAN_RUNTIME_LIBRARIES}
diff --git a/lib/ubsan/lit_tests/Integer/add-overflow.cpp b/lib/ubsan/lit_tests/Integer/add-overflow.cpp
index 4477638..8054352 100644
--- a/lib/ubsan/lit_tests/Integer/add-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/add-overflow.cpp
@@ -13,7 +13,7 @@
 #ifdef ADD_I32
   int32_t k = 0x12345678;
   k += 0x789abcde;
-  // CHECK-ADD_I32: add-overflow.cpp:[[@LINE-1]]:5: runtime error: signed integer overflow: 305419896 + 2023406814 cannot be represented in type 'int32_t' (aka 'int')
+  // CHECK-ADD_I32: add-overflow.cpp:[[@LINE-1]]:5: runtime error: signed integer overflow: 305419896 + 2023406814 cannot be represented in type 'int'
 #endif
 
 #ifdef ADD_I64
diff --git a/lib/ubsan/lit_tests/Integer/negate-overflow.cpp b/lib/ubsan/lit_tests/Integer/negate-overflow.cpp
index e3beb6b..2ee4f10 100644
--- a/lib/ubsan/lit_tests/Integer/negate-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/negate-overflow.cpp
@@ -1,7 +1,12 @@
-// RUN: %clang -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=CHECKS
+// RUN: %clang -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=CHECKU
 
 int main() {
+  // CHECKS-NOT: runtime error
+  // CHECKU: negate-overflow.cpp:[[@LINE+2]]:3: runtime error: negation of 2147483648 cannot be represented in type 'unsigned int'
+  // CHECKU-NOT: cast to an unsigned
   -unsigned(-0x7fffffff - 1); // ok
-  // CHECK: negate-overflow.cpp:6:10: runtime error: negation of -2147483648 cannot be represented in type 'int'; cast to an unsigned type to negate this value to itself
+  // CHECKS: negate-overflow.cpp:[[@LINE+2]]:10: runtime error: negation of -2147483648 cannot be represented in type 'int'; cast to an unsigned type to negate this value to itself
+  // CHECKU-NOT: runtime error
   return -(-0x7fffffff - 1);
 }
diff --git a/lib/ubsan/lit_tests/Integer/no-recover.cpp b/lib/ubsan/lit_tests/Integer/no-recover.cpp
index 08324bd..e200fea 100644
--- a/lib/ubsan/lit_tests/Integer/no-recover.cpp
+++ b/lib/ubsan/lit_tests/Integer/no-recover.cpp
@@ -1,5 +1,6 @@
-// RUN: %clang -fsanitize=unsigned-integer-overflow -Xclang -fsanitize-recover %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=RECOVER
-// RUN: %clang -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ABORT
+// RUN: %clang -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=RECOVER
+// RUN: %clang -fsanitize=unsigned-integer-overflow -fsanitize-recover %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=RECOVER
+// RUN: %clang -fsanitize=unsigned-integer-overflow -fno-sanitize-recover %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ABORT
 
 #include <stdint.h>
 
@@ -12,8 +13,8 @@
 
   uint32_t k = 0x87654321;
   k += 0xedcba987;
-  // RECOVER: no-recover.cpp:14:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'uint32_t' (aka 'unsigned int')
-  // ABORT: no-recover.cpp:14:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'uint32_t' (aka 'unsigned int')
+  // RECOVER: no-recover.cpp:[[@LINE-1]]:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'unsigned int'
+  // ABORT: no-recover.cpp:[[@LINE-2]]:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'unsigned int'
 
   (void)(uint64_t(10000000000000000000ull) + uint64_t(9000000000000000000ull));
   // RECOVER: 10000000000000000000 + 9000000000000000000 cannot be represented in type 'unsigned long'
diff --git a/lib/ubsan/lit_tests/Integer/uadd-overflow.cpp b/lib/ubsan/lit_tests/Integer/uadd-overflow.cpp
index d7b43d0..0edb100 100644
--- a/lib/ubsan/lit_tests/Integer/uadd-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/uadd-overflow.cpp
@@ -13,7 +13,7 @@
 #ifdef ADD_I32
   uint32_t k = 0x87654321;
   k += 0xedcba987;
-  // CHECK-ADD_I32: uadd-overflow.cpp:[[@LINE-1]]:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'uint32_t' (aka 'unsigned int')
+  // CHECK-ADD_I32: uadd-overflow.cpp:[[@LINE-1]]:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'unsigned int'
 #endif
 
 #ifdef ADD_I64
diff --git a/lib/ubsan/lit_tests/Misc/bool.cpp b/lib/ubsan/lit_tests/Misc/bool.cpp
new file mode 100644
index 0000000..8fafe7e
--- /dev/null
+++ b/lib/ubsan/lit_tests/Misc/bool.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang -fsanitize=bool %s -O3 -o %T/bool.exe && %T/bool.exe 2>&1 | FileCheck %s
+
+unsigned char NotABool = 123;
+
+int main(int argc, char **argv) {
+  bool *p = (bool*)&NotABool;
+
+  // FIXME: Provide a better source location here.
+  // CHECK: bool.exe:0x{{[0-9a-f]*}}: runtime error: load of value 123, which is not a valid value for type 'bool'
+  return *p;
+}
diff --git a/lib/ubsan/lit_tests/Misc/bounds.cpp b/lib/ubsan/lit_tests/Misc/bounds.cpp
new file mode 100644
index 0000000..07b30d3
--- /dev/null
+++ b/lib/ubsan/lit_tests/Misc/bounds.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang -fsanitize=bounds %s -O3 -o %T/bounds.exe
+// RUN: %T/bounds.exe 0 0 0
+// RUN: %T/bounds.exe 1 2 3
+// RUN: %T/bounds.exe 2 0 0 2>&1 | FileCheck %s --check-prefix=CHECK-A-2
+// RUN: %T/bounds.exe 0 3 0 2>&1 | FileCheck %s --check-prefix=CHECK-B-3
+// RUN: %T/bounds.exe 0 0 4 2>&1 | FileCheck %s --check-prefix=CHECK-C-4
+
+int main(int argc, char **argv) {
+  int arr[2][3][4] = {};
+
+  return arr[argv[1][0] - '0'][argv[2][0] - '0'][argv[3][0] - '0'];
+  // CHECK-A-2: bounds.cpp:11:10: runtime error: index 2 out of bounds for type 'int [2][3][4]'
+  // CHECK-B-3: bounds.cpp:11:10: runtime error: index 3 out of bounds for type 'int [3][4]'
+  // CHECK-C-4: bounds.cpp:11:10: runtime error: index 4 out of bounds for type 'int [4]'
+}
diff --git a/lib/ubsan/lit_tests/Misc/deduplication.cpp b/lib/ubsan/lit_tests/Misc/deduplication.cpp
new file mode 100644
index 0000000..d9c909f
--- /dev/null
+++ b/lib/ubsan/lit_tests/Misc/deduplication.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang -fsanitize=undefined %s -o %t && %t 2>&1 | FileCheck %s
+// Verify deduplication works by ensuring only one diag is emitted.
+#include <limits.h>
+#include <stdio.h>
+
+void overflow() {
+  int i = INT_MIN;
+  --i;
+}
+
+int main() {
+  // CHECK: Start
+  fprintf(stderr, "Start\n");
+
+  // CHECK: runtime error
+  // CHECK-NOT: runtime error
+  // CHECK-NOT: runtime error
+  overflow();
+  overflow();
+  overflow();
+
+  // CHECK: End
+  fprintf(stderr, "End\n");
+  return 0;
+}
diff --git a/lib/ubsan/lit_tests/Misc/enum.cpp b/lib/ubsan/lit_tests/Misc/enum.cpp
new file mode 100644
index 0000000..b363fea
--- /dev/null
+++ b/lib/ubsan/lit_tests/Misc/enum.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang -fsanitize=enum %s -O3 -o %t && %t 2>&1 | FileCheck %s --check-prefix=CHECK-PLAIN
+// RUN: %clang -fsanitize=enum -std=c++11 -DE="class E" %s -O3 -o %t && %t
+// RUN: %clang -fsanitize=enum -std=c++11 -DE="class E : bool" %s -O3 -o %t && %t 2>&1 | FileCheck %s --check-prefix=CHECK-BOOL
+
+enum E { a = 1 } e;
+#undef E
+
+int main(int argc, char **argv) {
+  // memset(&e, 0xff, sizeof(e));
+  for (unsigned char *p = (unsigned char*)&e; p != (unsigned char*)(&e + 1); ++p)
+    *p = 0xff;
+
+  // CHECK-PLAIN: error: load of value 4294967295, which is not a valid value for type 'enum E'
+  // FIXME: Support marshalling and display of enum class values.
+  // CHECK-BOOL: error: load of value <unknown>, which is not a valid value for type 'enum E'
+  return (int)e != -1;
+}
diff --git a/lib/ubsan/lit_tests/TypeCheck/misaligned.cpp b/lib/ubsan/lit_tests/TypeCheck/misaligned.cpp
index af52bd1..3abacae 100644
--- a/lib/ubsan/lit_tests/TypeCheck/misaligned.cpp
+++ b/lib/ubsan/lit_tests/TypeCheck/misaligned.cpp
@@ -1,42 +1,73 @@
 // RUN: %clang -fsanitize=alignment %s -O3 -o %t
-// RUN: %t l0 && %t s0 && %t r0 && %t m0 && %t f0
-// RUN: %t l1 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD
+// RUN: %t l0 && %t s0 && %t r0 && %t m0 && %t f0 && %t n0
+// RUN: %t l1 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD --strict-whitespace
 // RUN: %t s1 2>&1 | FileCheck %s --check-prefix=CHECK-STORE
 // RUN: %t r1 2>&1 | FileCheck %s --check-prefix=CHECK-REFERENCE
 // RUN: %t m1 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER
 // RUN: %t f1 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN
+// RUN: %t n1 2>&1 | FileCheck %s --check-prefix=CHECK-NEW
+
+#include <new>
 
 struct S {
+  S() {}
   int f() { return 0; }
   int k;
 };
 
 int main(int, char **argv) {
-  char c[5] __attribute__((aligned(4))) = {};
+  char c[] __attribute__((aligned(8))) = { 0, 0, 0, 0, 1, 2, 3, 4, 5 };
 
   // Pointer value may be unspecified here, but behavior is not undefined.
-  int *p = (int*)&c[argv[1][1] - '0'];
+  int *p = (int*)&c[4 + argv[1][1] - '0'];
   S *s = (S*)p;
 
   (void)*p; // ok!
 
   switch (argv[1][0]) {
   case 'l':
-    // CHECK-LOAD: misaligned.cpp:26:12: runtime error: load of misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
-    return *p;
+    // CHECK-LOAD: misaligned.cpp:[[@LINE+4]]:12: runtime error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'int', which requires 4 byte alignment
+    // CHECK-LOAD-NEXT: [[PTR]]: note: pointer points here
+    // CHECK-LOAD-NEXT: {{^ 00 00 00 01 02 03 04  05}}
+    // CHECK-LOAD-NEXT: {{^             \^}}
+    return *p && 0;
+
   case 's':
-    // CHECK-STORE: misaligned.cpp:29:5: runtime error: store to misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
+    // CHECK-STORE: misaligned.cpp:[[@LINE+4]]:5: runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int', which requires 4 byte alignment
+    // CHECK-STORE-NEXT: [[PTR]]: note: pointer points here
+    // CHECK-STORE-NEXT: {{^ 00 00 00 01 02 03 04  05}}
+    // CHECK-STORE-NEXT: {{^             \^}}
     *p = 1;
     break;
+
   case 'r':
-    // CHECK-REFERENCE: misaligned.cpp:33:15: runtime error: reference binding to misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
+    // CHECK-REFERENCE: misaligned.cpp:[[@LINE+4]]:15: runtime error: reference binding to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int', which requires 4 byte alignment
+    // CHECK-REFERENCE-NEXT: [[PTR]]: note: pointer points here
+    // CHECK-REFERENCE-NEXT: {{^ 00 00 00 01 02 03 04  05}}
+    // CHECK-REFERENCE-NEXT: {{^             \^}}
     {int &r = *p;}
     break;
+
   case 'm':
-    // CHECK-MEMBER: misaligned.cpp:37:15: runtime error: member access within misaligned address 0x{{[0-9a-f]*}} for type 'S', which requires 4 byte alignment
-    return s->k;
+    // CHECK-MEMBER: misaligned.cpp:[[@LINE+4]]:15: runtime error: member access within misaligned address [[PTR:0x[0-9a-f]*]] for type 'S', which requires 4 byte alignment
+    // CHECK-MEMBER-NEXT: [[PTR]]: note: pointer points here
+    // CHECK-MEMBER-NEXT: {{^ 00 00 00 01 02 03 04  05}}
+    // CHECK-MEMBER-NEXT: {{^             \^}}
+    return s->k && 0;
+
   case 'f':
-    // CHECK-MEMFUN: misaligned.cpp:40:12: runtime error: member call on misaligned address 0x{{[0-9a-f]*}} for type 'S', which requires 4 byte alignment
-    return s->f();
+    // CHECK-MEMFUN: misaligned.cpp:[[@LINE+4]]:12: runtime error: member call on misaligned address [[PTR:0x[0-9a-f]*]] for type 'S', which requires 4 byte alignment
+    // CHECK-MEMFUN-NEXT: [[PTR]]: note: pointer points here
+    // CHECK-MEMFUN-NEXT: {{^ 00 00 00 01 02 03 04  05}}
+    // CHECK-MEMFUN-NEXT: {{^             \^}}
+    return s->f() && 0;
+
+  case 'n':
+    // FIXME: Provide a better source location here.
+    // CHECK-NEW: misaligned{{.*}}:0x{{[0-9a-f]*}}: runtime error: constructor call on misaligned address [[PTR:0x[0-9a-f]*]] for type 'S', which requires 4 byte alignment
+    // CHECK-NEW-NEXT: [[PTR]]: note: pointer points here
+    // CHECK-NEW-NEXT: {{^ 00 00 00 01 02 03 04  05}}
+    // CHECK-NEW-NEXT: {{^             \^}}
+    return (new (s) S)->k && 0;
   }
 }
diff --git a/lib/ubsan/lit_tests/TypeCheck/vptr.cpp b/lib/ubsan/lit_tests/TypeCheck/vptr.cpp
index 6533568..109e7a8 100644
--- a/lib/ubsan/lit_tests/TypeCheck/vptr.cpp
+++ b/lib/ubsan/lit_tests/TypeCheck/vptr.cpp
@@ -1,12 +1,15 @@
 // RUN: %clang -ccc-cxx -fsanitize=vptr %s -O3 -o %t
-// RUN: %t rT && %t mT && %t fT
-// RUN: %t rU && %t mU && %t fU
-// RUN: %t rS 2>&1 | FileCheck %s --check-prefix=CHECK-REFERENCE
-// RUN: %t mS 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER
-// RUN: %t fS 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN
-// RUN: %t rV 2>&1 | FileCheck %s --check-prefix=CHECK-REFERENCE
-// RUN: %t mV 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER
-// RUN: %t fV 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN
+// RUN: %t rT && %t mT && %t fT && %t cT
+// RUN: %t rU && %t mU && %t fU && %t cU
+// RUN: %t rS && %t rV && %t oV
+// RUN: %t mS 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER --strict-whitespace
+// RUN: %t fS 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN --strict-whitespace
+// RUN: %t cS 2>&1 | FileCheck %s --check-prefix=CHECK-DOWNCAST --strict-whitespace
+// RUN: %t mV 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER --strict-whitespace
+// RUN: %t fV 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN --strict-whitespace
+// RUN: %t cV 2>&1 | FileCheck %s --check-prefix=CHECK-DOWNCAST --strict-whitespace
+// RUN: %t oU 2>&1 | FileCheck %s --check-prefix=CHECK-OFFSET --strict-whitespace
+// RUN: %t m0 2>&1 | FileCheck %s --check-prefix=CHECK-NULL-MEMBER --strict-whitespace
 
 // FIXME: This test produces linker errors on Darwin.
 // XFAIL: darwin
@@ -47,7 +50,11 @@
   (void)((T&)u).S::v();
 
   T *p = 0;
+  char Buffer[sizeof(U)] = {};
   switch (argv[1][1]) {
+  case '0':
+    p = reinterpret_cast<T*>(Buffer);
+    break;
   case 'S':
     p = reinterpret_cast<T*>(new S);
     break;
@@ -64,14 +71,47 @@
 
   switch (argv[1][0]) {
   case 'r':
-    // CHECK-REFERENCE: vptr.cpp:[[@LINE+1]]:13: runtime error: reference binding to address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
+    // Binding a reference to storage of appropriate size and alignment is OK.
     {T &r = *p;}
     break;
+
   case 'm':
-    // CHECK-MEMBER: vptr.cpp:[[@LINE+1]]:15: runtime error: member access within address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
+    // CHECK-MEMBER: vptr.cpp:[[@LINE+5]]:15: runtime error: member access within address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
+    // CHECK-MEMBER-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']]
+    // CHECK-MEMBER-NEXT: {{^ .. .. .. ..  .. .. .. .. .. .. .. ..  }}
+    // CHECK-MEMBER-NEXT: {{^              \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}}
+    // CHECK-MEMBER-NEXT: {{^              vptr for}} [[DYN_TYPE]]
     return p->b;
+
+    // CHECK-NULL-MEMBER: vptr.cpp:[[@LINE-2]]:15: runtime error: member access within address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
+    // CHECK-NULL-MEMBER-NEXT: [[PTR]]: note: object has invalid vptr
+    // CHECK-NULL-MEMBER-NEXT: {{^ .. .. .. ..  00 00 00 00 00 00 00 00  }}
+    // CHECK-NULL-MEMBER-NEXT: {{^              \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}}
+    // CHECK-NULL-MEMBER-NEXT: {{^              invalid vptr}}
+
   case 'f':
-    // CHECK-MEMFUN: vptr.cpp:[[@LINE+1]]:12: runtime error: member call on address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
+    // CHECK-MEMFUN: vptr.cpp:[[@LINE+5]]:12: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
+    // CHECK-MEMFUN-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']]
+    // CHECK-MEMFUN-NEXT: {{^ .. .. .. ..  .. .. .. .. .. .. .. ..  }}
+    // CHECK-MEMFUN-NEXT: {{^              \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}}
+    // CHECK-MEMFUN-NEXT: {{^              vptr for}} [[DYN_TYPE]]
     return p->g();
+
+  case 'o':
+    // CHECK-OFFSET: vptr.cpp:[[@LINE+5]]:12: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'U'
+    // CHECK-OFFSET-NEXT: 0x{{[0-9a-f]*}}: note: object is base class subobject at offset {{8|16}} within object of type [[DYN_TYPE:'U']]
+    // CHECK-OFFSET-NEXT: {{^ .. .. .. ..  .. .. .. .. .. .. .. ..  .. .. .. .. .. .. .. ..  .. .. .. .. .. .. .. ..  }}
+    // CHECK-OFFSET-NEXT: {{^              \^                        (                         ~~~~~~~~~~~~)~~~~~~~~~~~ *$}}
+    // CHECK-OFFSET-NEXT: {{^                                       (                         )?vptr for}} 'T' base class of [[DYN_TYPE]]
+    return reinterpret_cast<U*>(p)->v() - 2;
+
+  case 'c':
+    // CHECK-DOWNCAST: vptr.cpp:[[@LINE+5]]:5: runtime error: downcast of address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
+    // CHECK-DOWNCAST-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']]
+    // CHECK-DOWNCAST-NEXT: {{^ .. .. .. ..  .. .. .. .. .. .. .. ..  }}
+    // CHECK-DOWNCAST-NEXT: {{^              \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}}
+    // CHECK-DOWNCAST-NEXT: {{^              vptr for}} [[DYN_TYPE]]
+    static_cast<T*>(reinterpret_cast<S*>(p));
+    return 0;
   }
 }
diff --git a/lib/ubsan/ubsan_diag.cc b/lib/ubsan/ubsan_diag.cc
index 8a1af4b..0727ed7 100644
--- a/lib/ubsan/ubsan_diag.cc
+++ b/lib/ubsan/ubsan_diag.cc
@@ -14,10 +14,29 @@
 #include "ubsan_diag.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
 #include <stdio.h>
 
 using namespace __ubsan;
 
+Location __ubsan::getCallerLocation(uptr CallerLoc) {
+  if (!CallerLoc)
+    return Location();
+
+  uptr Loc = StackTrace::GetPreviousInstructionPc(CallerLoc);
+
+  AddressInfo Info;
+  if (!SymbolizeCode(Loc, &Info, 1) || !Info.module || !*Info.module)
+    return Location(Loc);
+
+  if (!Info.function)
+    return ModuleLocation(Info.module, Info.module_offset);
+
+  return SourceLocation(Info.file, Info.line, Info.column);
+}
+
 Diag &Diag::operator<<(const TypeDescriptor &V) {
   return AddArg(V.getTypeName());
 }
@@ -34,7 +53,7 @@
   return *this;
 }
 
-/// Hexadecimal printing for numbers too large for fprintf to handle directly.
+/// Hexadecimal printing for numbers too large for Printf to handle directly.
 static void PrintHex(UIntMax Val) {
 #if HAVE_INT128_T
   Printf("0x%08x%08x%08x%08x",
@@ -47,22 +66,33 @@
 #endif
 }
 
-Diag::~Diag() {
-  bool UseAnsiColor = PrintsToTty();
-  if (UseAnsiColor)
-    RawWrite("\033[1m");
-  if (Loc.isInvalid())
-    RawWrite("<unknown>:");
-  else {
-    Printf("%s:%d:", Loc.getFilename(), Loc.getLine());
-    if (Loc.getColumn())
-      Printf("%d:", Loc.getColumn());
+static void renderLocation(Location Loc) {
+  switch (Loc.getKind()) {
+  case Location::LK_Source: {
+    SourceLocation SLoc = Loc.getSourceLocation();
+    if (SLoc.isInvalid())
+      Printf("<unknown>:");
+    else {
+      Printf("%s:%d:", SLoc.getFilename(), SLoc.getLine());
+      if (SLoc.getColumn())
+        Printf("%d:", SLoc.getColumn());
+    }
+    break;
   }
-  if (UseAnsiColor)
-    RawWrite("\033[31m");
-  RawWrite(" runtime error: ");
-  if (UseAnsiColor)
-    RawWrite("\033[0;1m");
+  case Location::LK_Module:
+    Printf("%s:0x%zx:", Loc.getModuleLocation().getModuleName(),
+           Loc.getModuleLocation().getOffset());
+    break;
+  case Location::LK_Memory:
+    Printf("%p:", Loc.getMemoryLocation());
+    break;
+  case Location::LK_Null:
+    Printf("<unknown>:");
+    break;
+  }
+}
+
+static void renderText(const char *Message, const Diag::Arg *Args) {
   for (const char *Msg = Message; *Msg; ++Msg) {
     if (*Msg != '%') {
       char Buffer[64];
@@ -70,28 +100,32 @@
       for (I = 0; Msg[I] && Msg[I] != '%' && I != 63; ++I)
         Buffer[I] = Msg[I];
       Buffer[I] = '\0';
-      RawWrite(Buffer);
+      Printf(Buffer);
       Msg += I - 1;
     } else {
-      const Arg &A = Args[*++Msg - '0'];
+      const Diag::Arg &A = Args[*++Msg - '0'];
       switch (A.Kind) {
-      case AK_String:
+      case Diag::AK_String:
         Printf("%s", A.String);
         break;
-      case AK_SInt:
+      case Diag::AK_Mangled: {
+        Printf("'%s'", Demangle(A.String));
+        break;
+      }
+      case Diag::AK_SInt:
         // 'long long' is guaranteed to be at least 64 bits wide.
         if (A.SInt >= INT64_MIN && A.SInt <= INT64_MAX)
           Printf("%lld", (long long)A.SInt);
         else
           PrintHex(A.SInt);
         break;
-      case AK_UInt:
+      case Diag::AK_UInt:
         if (A.UInt <= UINT64_MAX)
           Printf("%llu", (unsigned long long)A.UInt);
         else
           PrintHex(A.UInt);
         break;
-      case AK_Float: {
+      case Diag::AK_Float: {
         // FIXME: Support floating-point formatting in sanitizer_common's
         //        printf, and stop using snprintf here.
         char Buffer[32];
@@ -99,13 +133,130 @@
         Printf("%s", Buffer);
         break;
       }
-      case AK_Pointer:
-        Printf("0x%zx", (uptr)A.Pointer);
+      case Diag::AK_Pointer:
+        Printf("%p", A.Pointer);
         break;
       }
     }
   }
-  RawWrite("\n");
-  if (UseAnsiColor)
-    Printf("\033[0m");
+}
+
+/// Find the earliest-starting range in Ranges which ends after Loc.
+static Range *upperBound(MemoryLocation Loc, Range *Ranges,
+                         unsigned NumRanges) {
+  Range *Best = 0;
+  for (unsigned I = 0; I != NumRanges; ++I)
+    if (Ranges[I].getEnd().getMemoryLocation() > Loc &&
+        (!Best ||
+         Best->getStart().getMemoryLocation() >
+         Ranges[I].getStart().getMemoryLocation()))
+      Best = &Ranges[I];
+  return Best;
+}
+
+/// Render a snippet of the address space near a location.
+static void renderMemorySnippet(const __sanitizer::AnsiColorDecorator &Decor,
+                                MemoryLocation Loc,
+                                Range *Ranges, unsigned NumRanges,
+                                const Diag::Arg *Args) {
+  const unsigned BytesToShow = 32;
+  const unsigned MinBytesNearLoc = 4;
+
+  // Show at least the 8 bytes surrounding Loc.
+  MemoryLocation Min = Loc - MinBytesNearLoc, Max = Loc + MinBytesNearLoc;
+  for (unsigned I = 0; I < NumRanges; ++I) {
+    Min = __sanitizer::Min(Ranges[I].getStart().getMemoryLocation(), Min);
+    Max = __sanitizer::Max(Ranges[I].getEnd().getMemoryLocation(), Max);
+  }
+
+  // If we have too many interesting bytes, prefer to show bytes after Loc.
+  if (Max - Min > BytesToShow)
+    Min = __sanitizer::Min(Max - BytesToShow, Loc - MinBytesNearLoc);
+  Max = Min + BytesToShow;
+
+  // Emit data.
+  for (uptr P = Min; P != Max; ++P) {
+    // FIXME: Check that the address is readable before printing it.
+    unsigned char C = *reinterpret_cast<const unsigned char*>(P);
+    Printf("%s%02x", (P % 8 == 0) ? "  " : " ", C);
+  }
+  Printf("\n");
+
+  // Emit highlights.
+  Printf(Decor.Green());
+  Range *InRange = upperBound(Min, Ranges, NumRanges);
+  for (uptr P = Min; P != Max; ++P) {
+    char Pad = ' ', Byte = ' ';
+    if (InRange && InRange->getEnd().getMemoryLocation() == P)
+      InRange = upperBound(P, Ranges, NumRanges);
+    if (!InRange && P > Loc)
+      break;
+    if (InRange && InRange->getStart().getMemoryLocation() < P)
+      Pad = '~';
+    if (InRange && InRange->getStart().getMemoryLocation() <= P)
+      Byte = '~';
+    char Buffer[] = { Pad, Pad, P == Loc ? '^' : Byte, Byte, 0 };
+    Printf((P % 8 == 0) ? Buffer : &Buffer[1]);
+  }
+  Printf("%s\n", Decor.Default());
+
+  // Go over the line again, and print names for the ranges.
+  InRange = 0;
+  unsigned Spaces = 0;
+  for (uptr P = Min; P != Max; ++P) {
+    if (!InRange || InRange->getEnd().getMemoryLocation() == P)
+      InRange = upperBound(P, Ranges, NumRanges);
+    if (!InRange)
+      break;
+
+    Spaces += (P % 8) == 0 ? 2 : 1;
+
+    if (InRange && InRange->getStart().getMemoryLocation() == P) {
+      while (Spaces--)
+        Printf(" ");
+      renderText(InRange->getText(), Args);
+      Printf("\n");
+      // FIXME: We only support naming one range for now!
+      break;
+    }
+
+    Spaces += 2;
+  }
+
+  // FIXME: Print names for anything we can identify within the line:
+  //
+  //  * If we can identify the memory itself as belonging to a particular
+  //    global, stack variable, or dynamic allocation, then do so.
+  //
+  //  * If we have a pointer-size, pointer-aligned range highlighted,
+  //    determine whether the value of that range is a pointer to an
+  //    entity which we can name, and if so, print that name.
+  //
+  // This needs an external symbolizer, or (preferably) ASan instrumentation.
+}
+
+Diag::~Diag() {
+  __sanitizer::AnsiColorDecorator Decor(PrintsToTty());
+  Printf(Decor.Bold());
+
+  renderLocation(Loc);
+
+  switch (Level) {
+  case DL_Error:
+    Printf("%s runtime error: %s%s",
+           Decor.Red(), Decor.Default(), Decor.Bold());
+    break;
+
+  case DL_Note:
+    Printf("%s note: %s", Decor.Black(), Decor.Default());
+    break;
+  }
+
+  renderText(Message, Args);
+
+  Printf("%s\n", Decor.Default());
+
+  if (Loc.isMemoryLocation())
+    renderMemorySnippet(Decor, Loc.getMemoryLocation(), Ranges,
+                        NumRanges, Args);
 }
diff --git a/lib/ubsan/ubsan_diag.h b/lib/ubsan/ubsan_diag.h
index 6424cee..16afffd 100644
--- a/lib/ubsan/ubsan_diag.h
+++ b/lib/ubsan/ubsan_diag.h
@@ -17,22 +17,118 @@
 
 namespace __ubsan {
 
+/// \brief A location within a loaded module in the program. These are used when
+/// the location can't be resolved to a SourceLocation.
+class ModuleLocation {
+  const char *ModuleName;
+  uptr Offset;
+
+public:
+  ModuleLocation() : ModuleName(0), Offset(0) {}
+  ModuleLocation(const char *ModuleName, uptr Offset)
+    : ModuleName(ModuleName), Offset(Offset) {}
+  const char *getModuleName() const { return ModuleName; }
+  uptr getOffset() const { return Offset; }
+};
+
+/// A location of some data within the program's address space.
+typedef uptr MemoryLocation;
+
+/// \brief Location at which a diagnostic can be emitted. Either a
+/// SourceLocation, a ModuleLocation, or a MemoryLocation.
+class Location {
+public:
+  enum LocationKind { LK_Null, LK_Source, LK_Module, LK_Memory };
+
+private:
+  LocationKind Kind;
+  // FIXME: In C++11, wrap these in an anonymous union.
+  SourceLocation SourceLoc;
+  ModuleLocation ModuleLoc;
+  MemoryLocation MemoryLoc;
+
+public:
+  Location() : Kind(LK_Null) {}
+  Location(SourceLocation Loc) :
+    Kind(LK_Source), SourceLoc(Loc) {}
+  Location(ModuleLocation Loc) :
+    Kind(LK_Module), ModuleLoc(Loc) {}
+  Location(MemoryLocation Loc) :
+    Kind(LK_Memory), MemoryLoc(Loc) {}
+
+  LocationKind getKind() const { return Kind; }
+
+  bool isSourceLocation() const { return Kind == LK_Source; }
+  bool isModuleLocation() const { return Kind == LK_Module; }
+  bool isMemoryLocation() const { return Kind == LK_Memory; }
+
+  SourceLocation getSourceLocation() const {
+    CHECK(isSourceLocation());
+    return SourceLoc;
+  }
+  ModuleLocation getModuleLocation() const {
+    CHECK(isModuleLocation());
+    return ModuleLoc;
+  }
+  MemoryLocation getMemoryLocation() const {
+    CHECK(isMemoryLocation());
+    return MemoryLoc;
+  }
+};
+
+/// Try to obtain a location for the caller. This might fail, and produce either
+/// an invalid location or a module location for the caller.
+Location getCallerLocation(uptr CallerLoc = GET_CALLER_PC());
+
+/// A diagnostic severity level.
+enum DiagLevel {
+  DL_Error, ///< An error.
+  DL_Note   ///< A note, attached to a prior diagnostic.
+};
+
+/// \brief Annotation for a range of locations in a diagnostic.
+class Range {
+  Location Start, End;
+  const char *Text;
+
+public:
+  Range() : Start(), End(), Text() {}
+  Range(MemoryLocation Start, MemoryLocation End, const char *Text)
+    : Start(Start), End(End), Text(Text) {}
+  Location getStart() const { return Start; }
+  Location getEnd() const { return End; }
+  const char *getText() const { return Text; }
+};
+
+/// \brief A mangled C++ name. Really just a strong typedef for 'const char*'.
+class MangledName {
+  const char *Name;
+public:
+  MangledName(const char *Name) : Name(Name) {}
+  const char *getName() const { return Name; }
+};
+
 /// \brief Representation of an in-flight diagnostic.
 ///
 /// Temporary \c Diag instances are created by the handler routines to
 /// accumulate arguments for a diagnostic. The destructor emits the diagnostic
 /// message.
 class Diag {
-  /// The source location at which the problem occurred.
-  const SourceLocation &Loc;
+  /// The location at which the problem occurred.
+  Location Loc;
+
+  /// The diagnostic level.
+  DiagLevel Level;
 
   /// The message which will be emitted, with %0, %1, ... placeholders for
   /// arguments.
   const char *Message;
 
+public:
   /// Kinds of arguments, corresponding to members of \c Arg's union.
   enum ArgKind {
     AK_String, ///< A string argument, displayed as-is.
+    AK_Mangled,///< A C++ mangled name, demangled before display.
     AK_UInt,   ///< An unsigned integer argument.
     AK_SInt,   ///< A signed integer argument.
     AK_Float,  ///< A floating-point argument.
@@ -43,6 +139,7 @@
   struct Arg {
     Arg() {}
     Arg(const char *String) : Kind(AK_String), String(String) {}
+    Arg(MangledName MN) : Kind(AK_Mangled), String(MN.getName()) {}
     Arg(UIntMax UInt) : Kind(AK_UInt), UInt(UInt) {}
     Arg(SIntMax SInt) : Kind(AK_SInt), SInt(SInt) {}
     Arg(FloatMax Float) : Kind(AK_Float), Float(Float) {}
@@ -58,32 +155,46 @@
     };
   };
 
+private:
   static const unsigned MaxArgs = 5;
+  static const unsigned MaxRanges = 1;
 
   /// The arguments which have been added to this diagnostic so far.
   Arg Args[MaxArgs];
   unsigned NumArgs;
 
+  /// The ranges which have been added to this diagnostic so far.
+  Range Ranges[MaxRanges];
+  unsigned NumRanges;
+
   Diag &AddArg(Arg A) {
     CHECK(NumArgs != MaxArgs);
     Args[NumArgs++] = A;
     return *this;
   }
 
+  Diag &AddRange(Range A) {
+    CHECK(NumRanges != MaxRanges);
+    Ranges[NumRanges++] = A;
+    return *this;
+  }
+
   /// \c Diag objects are not copyable.
   Diag(const Diag &); // NOT IMPLEMENTED
   Diag &operator=(const Diag &);
 
 public:
-  Diag(const SourceLocation &Loc, const char *Message)
-    : Loc(Loc), Message(Message), NumArgs(0) {}
+  Diag(Location Loc, DiagLevel Level, const char *Message)
+    : Loc(Loc), Level(Level), Message(Message), NumArgs(0), NumRanges(0) {}
   ~Diag();
 
   Diag &operator<<(const char *Str) { return AddArg(Str); }
+  Diag &operator<<(MangledName MN) { return AddArg(MN); }
   Diag &operator<<(unsigned long long V) { return AddArg(UIntMax(V)); }
   Diag &operator<<(const void *V) { return AddArg(V); }
   Diag &operator<<(const TypeDescriptor &V);
   Diag &operator<<(const Value &V);
+  Diag &operator<<(const Range &R) { return AddRange(R); }
 };
 
 } // namespace __ubsan
diff --git a/lib/ubsan/ubsan_handlers.cc b/lib/ubsan/ubsan_handlers.cc
index 47f06e8..fa93b09 100644
--- a/lib/ubsan/ubsan_handlers.cc
+++ b/lib/ubsan/ubsan_handlers.cc
@@ -22,38 +22,56 @@
 namespace __ubsan {
   const char *TypeCheckKinds[] = {
     "load of", "store to", "reference binding to", "member access within",
-    "member call on", "constructor call on"
+    "member call on", "constructor call on", "downcast of", "downcast of"
   };
 }
 
-void __ubsan::__ubsan_handle_type_mismatch(TypeMismatchData *Data,
-                                           ValueHandle Pointer) {
+static void handleTypeMismatchImpl(TypeMismatchData *Data, ValueHandle Pointer,
+                                   Location FallbackLoc) {
+  Location Loc = Data->Loc.acquire();
+
+  // Use the SourceLocation from Data to track deduplication, even if 'invalid'
+  if (Loc.getSourceLocation().isDisabled())
+    return;
+  if (Data->Loc.isInvalid())
+    Loc = FallbackLoc;
+
   if (!Pointer)
-    Diag(Data->Loc, "%0 null pointer of type %1")
+    Diag(Loc, DL_Error, "%0 null pointer of type %1")
       << TypeCheckKinds[Data->TypeCheckKind] << Data->Type;
   else if (Data->Alignment && (Pointer & (Data->Alignment - 1)))
-    Diag(Data->Loc, "%0 misaligned address %1 for type %3, "
-                    "which requires %2 byte alignment")
+    Diag(Loc, DL_Error, "%0 misaligned address %1 for type %3, "
+                        "which requires %2 byte alignment")
       << TypeCheckKinds[Data->TypeCheckKind] << (void*)Pointer
       << Data->Alignment << Data->Type;
   else
-    Diag(Data->Loc, "%0 address %1 with insufficient space "
-                    "for an object of type %2")
+    Diag(Loc, DL_Error, "%0 address %1 with insufficient space "
+                        "for an object of type %2")
       << TypeCheckKinds[Data->TypeCheckKind] << (void*)Pointer << Data->Type;
+  if (Pointer)
+    Diag(Pointer, DL_Note, "pointer points here");
+}
+void __ubsan::__ubsan_handle_type_mismatch(TypeMismatchData *Data,
+                                           ValueHandle Pointer) {
+  handleTypeMismatchImpl(Data, Pointer, getCallerLocation());
 }
 void __ubsan::__ubsan_handle_type_mismatch_abort(TypeMismatchData *Data,
-                                                  ValueHandle Pointer) {
-  __ubsan_handle_type_mismatch(Data, Pointer);
+                                                 ValueHandle Pointer) {
+  handleTypeMismatchImpl(Data, Pointer, getCallerLocation());
   Die();
 }
 
 /// \brief Common diagnostic emission for various forms of integer overflow.
 template<typename T> static void HandleIntegerOverflow(OverflowData *Data,
-                                                      ValueHandle LHS,
-                                                      const char *Operator,
-                                                      T RHS) {
-  Diag(Data->Loc, "%0 integer overflow: "
-                  "%1 %2 %3 cannot be represented in type %4")
+                                                       ValueHandle LHS,
+                                                       const char *Operator,
+                                                       T RHS) {
+  SourceLocation Loc = Data->Loc.acquire();
+  if (Loc.isDisabled())
+    return;
+
+  Diag(Loc, DL_Error, "%0 integer overflow: "
+                      "%1 %2 %3 cannot be represented in type %4")
     << (Data->Type.isSignedIntegerTy() ? "signed" : "unsigned")
     << Value(Data->Type, LHS) << Operator << RHS << Data->Type;
 }
@@ -93,9 +111,19 @@
 
 void __ubsan::__ubsan_handle_negate_overflow(OverflowData *Data,
                                              ValueHandle OldVal) {
-  Diag(Data->Loc, "negation of %0 cannot be represented in type %1; "
-                  "cast to an unsigned type to negate this value to itself")
-    << Value(Data->Type, OldVal) << Data->Type;
+  SourceLocation Loc = Data->Loc.acquire();
+  if (Loc.isDisabled())
+    return;
+
+  if (Data->Type.isSignedIntegerTy())
+    Diag(Loc, DL_Error,
+         "negation of %0 cannot be represented in type %1; "
+         "cast to an unsigned type to negate this value to itself")
+      << Value(Data->Type, OldVal) << Data->Type;
+  else
+    Diag(Loc, DL_Error,
+         "negation of %0 cannot be represented in type %1")
+      << Value(Data->Type, OldVal) << Data->Type;
 }
 void __ubsan::__ubsan_handle_negate_overflow_abort(OverflowData *Data,
                                                     ValueHandle OldVal) {
@@ -105,13 +133,18 @@
 
 void __ubsan::__ubsan_handle_divrem_overflow(OverflowData *Data,
                                              ValueHandle LHS, ValueHandle RHS) {
+  SourceLocation Loc = Data->Loc.acquire();
+  if (Loc.isDisabled())
+    return;
+
   Value LHSVal(Data->Type, LHS);
   Value RHSVal(Data->Type, RHS);
   if (RHSVal.isMinusOne())
-    Diag(Data->Loc, "division of %0 by -1 cannot be represented in type %1")
+    Diag(Loc, DL_Error,
+         "division of %0 by -1 cannot be represented in type %1")
       << LHSVal << Data->Type;
   else
-    Diag(Data->Loc, "division by zero");
+    Diag(Loc, DL_Error, "division by zero");
 }
 void __ubsan::__ubsan_handle_divrem_overflow_abort(OverflowData *Data,
                                                     ValueHandle LHS,
@@ -123,18 +156,24 @@
 void __ubsan::__ubsan_handle_shift_out_of_bounds(ShiftOutOfBoundsData *Data,
                                                  ValueHandle LHS,
                                                  ValueHandle RHS) {
+  SourceLocation Loc = Data->Loc.acquire();
+  if (Loc.isDisabled())
+    return;
+
   Value LHSVal(Data->LHSType, LHS);
   Value RHSVal(Data->RHSType, RHS);
   if (RHSVal.isNegative())
-    Diag(Data->Loc, "shift exponent %0 is negative") << RHSVal;
+    Diag(Loc, DL_Error, "shift exponent %0 is negative") << RHSVal;
   else if (RHSVal.getPositiveIntValue() >= Data->LHSType.getIntegerBitWidth())
-    Diag(Data->Loc, "shift exponent %0 is too large for %1-bit type %2")
+    Diag(Loc, DL_Error,
+         "shift exponent %0 is too large for %1-bit type %2")
       << RHSVal << Data->LHSType.getIntegerBitWidth() << Data->LHSType;
   else if (LHSVal.isNegative())
-    Diag(Data->Loc, "left shift of negative value %0") << LHSVal;
+    Diag(Loc, DL_Error, "left shift of negative value %0") << LHSVal;
   else
-    Diag(Data->Loc, "left shift of %0 by %1 places cannot be represented "
-                    "in type %2") << LHSVal << RHSVal << Data->LHSType;
+    Diag(Loc, DL_Error,
+         "left shift of %0 by %1 places cannot be represented in type %2")
+      << LHSVal << RHSVal << Data->LHSType;
 }
 void __ubsan::__ubsan_handle_shift_out_of_bounds_abort(
                                                      ShiftOutOfBoundsData *Data,
@@ -144,21 +183,42 @@
   Die();
 }
 
+void __ubsan::__ubsan_handle_out_of_bounds(OutOfBoundsData *Data,
+                                           ValueHandle Index) {
+  SourceLocation Loc = Data->Loc.acquire();
+  if (Loc.isDisabled())
+    return;
+
+  Value IndexVal(Data->IndexType, Index);
+  Diag(Loc, DL_Error, "index %0 out of bounds for type %1")
+    << IndexVal << Data->ArrayType;
+}
+void __ubsan::__ubsan_handle_out_of_bounds_abort(OutOfBoundsData *Data,
+                                                 ValueHandle Index) {
+  __ubsan_handle_out_of_bounds(Data, Index);
+  Die();
+}
+
 void __ubsan::__ubsan_handle_builtin_unreachable(UnreachableData *Data) {
-  Diag(Data->Loc, "execution reached a __builtin_unreachable() call");
+  Diag(Data->Loc, DL_Error, "execution reached a __builtin_unreachable() call");
   Die();
 }
 
 void __ubsan::__ubsan_handle_missing_return(UnreachableData *Data) {
-  Diag(Data->Loc, "execution reached the end of a value-returning function "
-                  "without returning a value");
+  Diag(Data->Loc, DL_Error,
+       "execution reached the end of a value-returning function "
+       "without returning a value");
   Die();
 }
 
 void __ubsan::__ubsan_handle_vla_bound_not_positive(VLABoundData *Data,
                                                     ValueHandle Bound) {
-  Diag(Data->Loc, "variable length array bound evaluates to "
-                  "non-positive value %0")
+  SourceLocation Loc = Data->Loc.acquire();
+  if (Loc.isDisabled())
+    return;
+
+  Diag(Loc, DL_Error, "variable length array bound evaluates to "
+                      "non-positive value %0")
     << Value(Data->Type, Bound);
 }
 void __ubsan::__ubsan_handle_vla_bound_not_positive_abort(VLABoundData *Data,
@@ -167,15 +227,34 @@
   Die();
 }
 
+
 void __ubsan::__ubsan_handle_float_cast_overflow(FloatCastOverflowData *Data,
                                                  ValueHandle From) {
-  Diag(SourceLocation(), "value %0 is outside the range of representable "
-                         "values of type %2")
+  // TODO: Add deduplication once a SourceLocation is generated for this check.
+  Diag(getCallerLocation(), DL_Error,
+       "value %0 is outside the range of representable values of type %2")
     << Value(Data->FromType, From) << Data->FromType << Data->ToType;
 }
 void __ubsan::__ubsan_handle_float_cast_overflow_abort(
                                                     FloatCastOverflowData *Data,
                                                     ValueHandle From) {
-  __ubsan_handle_float_cast_overflow(Data, From);
+  Diag(getCallerLocation(), DL_Error,
+       "value %0 is outside the range of representable values of type %2")
+    << Value(Data->FromType, From) << Data->FromType << Data->ToType;
+  Die();
+}
+
+void __ubsan::__ubsan_handle_load_invalid_value(InvalidValueData *Data,
+                                                ValueHandle Val) {
+  // TODO: Add deduplication once a SourceLocation is generated for this check.
+  Diag(getCallerLocation(), DL_Error,
+       "load of value %0, which is not a valid value for type %1")
+    << Value(Data->Type, Val) << Data->Type;
+}
+void __ubsan::__ubsan_handle_load_invalid_value_abort(InvalidValueData *Data,
+                                                      ValueHandle Val) {
+  Diag(getCallerLocation(), DL_Error,
+       "load of value %0, which is not a valid value for type %1")
+    << Value(Data->Type, Val) << Data->Type;
   Die();
 }
diff --git a/lib/ubsan/ubsan_handlers.h b/lib/ubsan/ubsan_handlers.h
index 5709fcf..5e237e1 100644
--- a/lib/ubsan/ubsan_handlers.h
+++ b/lib/ubsan/ubsan_handlers.h
@@ -25,8 +25,10 @@
 };
 
 #define RECOVERABLE(checkname, ...) \
-  extern "C" void __ubsan_handle_ ## checkname( __VA_ARGS__ ); \
-  extern "C" void __ubsan_handle_ ## checkname ## _abort( __VA_ARGS__ );
+  extern "C" SANITIZER_INTERFACE_ATTRIBUTE \
+    void __ubsan_handle_ ## checkname( __VA_ARGS__ ); \
+  extern "C" SANITIZER_INTERFACE_ATTRIBUTE \
+    void __ubsan_handle_ ## checkname ## _abort( __VA_ARGS__ );
 
 /// \brief Handle a runtime type check failure, caused by either a misaligned
 /// pointer, a null pointer, or a pointer to insufficient storage for the
@@ -65,14 +67,25 @@
 RECOVERABLE(shift_out_of_bounds, ShiftOutOfBoundsData *Data,
             ValueHandle LHS, ValueHandle RHS)
 
+struct OutOfBoundsData {
+  SourceLocation Loc;
+  const TypeDescriptor &ArrayType;
+  const TypeDescriptor &IndexType;
+};
+
+/// \brief Handle an array index out of bounds error.
+RECOVERABLE(out_of_bounds, OutOfBoundsData *Data, ValueHandle Index)
+
 struct UnreachableData {
   SourceLocation Loc;
 };
 
 /// \brief Handle a __builtin_unreachable which is reached.
-extern "C" void __ubsan_handle_builtin_unreachable(UnreachableData *Data);
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+void __ubsan_handle_builtin_unreachable(UnreachableData *Data);
 /// \brief Handle reaching the end of a value-returning function.
-extern "C" void __ubsan_handle_missing_return(UnreachableData *Data);
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+void __ubsan_handle_missing_return(UnreachableData *Data);
 
 struct VLABoundData {
   SourceLocation Loc;
@@ -91,6 +104,14 @@
 /// \brief Handle overflow in a conversion to or from a floating-point type.
 RECOVERABLE(float_cast_overflow, FloatCastOverflowData *Data, ValueHandle From)
 
+struct InvalidValueData {
+  // FIXME: SourceLocation Loc;
+  const TypeDescriptor &Type;
+};
+
+/// \brief Handle a load of an invalid value for the type.
+RECOVERABLE(load_invalid_value, InvalidValueData *Data, ValueHandle Val)
+
 }
 
 #endif // UBSAN_HANDLERS_H
diff --git a/lib/ubsan/ubsan_handlers_cxx.cc b/lib/ubsan/ubsan_handlers_cxx.cc
index 593fe13..b6cddef 100644
--- a/lib/ubsan/ubsan_handlers_cxx.cc
+++ b/lib/ubsan/ubsan_handlers_cxx.cc
@@ -27,34 +27,48 @@
 }
 
 static void HandleDynamicTypeCacheMiss(
-  DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash,
-  bool abort) {
+    DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash,
+    bool Abort) {
   if (checkDynamicType((void*)Pointer, Data->TypeInfo, Hash))
     // Just a cache miss. The type matches after all.
     return;
 
-  Diag(Data->Loc, "%0 address %1 which does not point to an object of type %2")
+  SourceLocation Loc = Data->Loc.acquire();
+  if (Loc.isDisabled())
+    return;
+
+  Diag(Loc, DL_Error,
+       "%0 address %1 which does not point to an object of type %2")
     << TypeCheckKinds[Data->TypeCheckKind] << (void*)Pointer << Data->Type;
-  // FIXME: If possible, say what type it actually points to. Produce a note
-  //        pointing out the vptr:
-  // lib/VMCore/Instructions.cpp:2020:10: runtime error: member call on address
-  //       0xb7a4440 which does not point to an object of type
-  //       'llvm::OverflowingBinaryOperator'
-  //   return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
-  //                                               ^
-  // 0xb7a4440: note: object is of type 'llvm::BinaryOperator'
-  //   00 00 00 00  e0 f7 c5 09 00 00 00 00  20 00 00 00
-  //                ^~~~~~~~~~~
-  //                vptr for 'llvm::BinaryOperator'
-  if (abort)
+
+  // If possible, say what type it actually points to.
+  DynamicTypeInfo DTI = getDynamicTypeInfo((void*)Pointer);
+  if (!DTI.isValid())
+    Diag(Pointer, DL_Note, "object has invalid vptr")
+      << MangledName(DTI.getMostDerivedTypeName())
+      << Range(Pointer, Pointer + sizeof(uptr), "invalid vptr");
+  else if (!DTI.getOffset())
+    Diag(Pointer, DL_Note, "object is of type %0")
+      << MangledName(DTI.getMostDerivedTypeName())
+      << Range(Pointer, Pointer + sizeof(uptr), "vptr for %0");
+  else
+    // FIXME: Find the type at the specified offset, and include that
+    //        in the note.
+    Diag(Pointer - DTI.getOffset(), DL_Note,
+         "object is base class subobject at offset %0 within object of type %1")
+      << DTI.getOffset() << MangledName(DTI.getMostDerivedTypeName())
+      << MangledName(DTI.getSubobjectTypeName())
+      << Range(Pointer, Pointer + sizeof(uptr), "vptr for %2 base class of %1");
+
+  if (Abort)
     Die();
 }
 
 void __ubsan::__ubsan_handle_dynamic_type_cache_miss(
-  DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash) {
+    DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash) {
   HandleDynamicTypeCacheMiss(Data, Pointer, Hash, false);
 }
 void __ubsan::__ubsan_handle_dynamic_type_cache_miss_abort(
-  DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash) {
+    DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash) {
   HandleDynamicTypeCacheMiss(Data, Pointer, Hash, true);
 }
diff --git a/lib/ubsan/ubsan_handlers_cxx.h b/lib/ubsan/ubsan_handlers_cxx.h
index 0fbcafb..cb1bca7 100644
--- a/lib/ubsan/ubsan_handlers_cxx.h
+++ b/lib/ubsan/ubsan_handlers_cxx.h
@@ -28,9 +28,11 @@
 /// \brief Handle a runtime type check failure, caused by an incorrect vptr.
 /// When this handler is called, all we know is that the type was not in the
 /// cache; this does not necessarily imply the existence of a bug.
-extern "C" void __ubsan_handle_dynamic_type_cache_miss(
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+void __ubsan_handle_dynamic_type_cache_miss(
   DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash);
-extern "C" void __ubsan_handle_dynamic_type_cache_miss_abort(
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+void __ubsan_handle_dynamic_type_cache_miss_abort(
   DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash);
 
 }
diff --git a/lib/ubsan/ubsan_type_hash.cc b/lib/ubsan/ubsan_type_hash.cc
index 1f6a3db..7a9cd28 100644
--- a/lib/ubsan/ubsan_type_hash.cc
+++ b/lib/ubsan/ubsan_type_hash.cc
@@ -25,7 +25,7 @@
   class type_info {
   public:
     virtual ~type_info();
-  private:
+
     const char *__type_name;
   };
 }
@@ -129,7 +129,7 @@
     // No base class subobjects.
     return false;
 
-  // Look for a zero-offset base class which is derived from \p Base.
+  // Look for a base class which is derived from \p Base at the right offset.
   for (unsigned int base = 0; base != VTI->base_count; ++base) {
     // FIXME: Curtail the recursion if this base can't possibly contain the
     //        given offset.
@@ -149,6 +149,39 @@
   return false;
 }
 
+/// \brief Find the derived-most dynamic base class of \p Derived at offset
+/// \p Offset.
+static const abi::__class_type_info *findBaseAtOffset(
+    const abi::__class_type_info *Derived, sptr Offset) {
+  if (!Offset)
+    return Derived;
+
+  if (const abi::__si_class_type_info *SI =
+        dynamic_cast<const abi::__si_class_type_info*>(Derived))
+    return findBaseAtOffset(SI->__base_type, Offset);
+
+  const abi::__vmi_class_type_info *VTI =
+    dynamic_cast<const abi::__vmi_class_type_info*>(Derived);
+  if (!VTI)
+    // No base class subobjects.
+    return 0;
+
+  for (unsigned int base = 0; base != VTI->base_count; ++base) {
+    sptr OffsetHere = VTI->base_info[base].__offset_flags >>
+                      abi::__base_class_type_info::__offset_shift;
+    if (VTI->base_info[base].__offset_flags &
+          abi::__base_class_type_info::__virtual_mask)
+      // FIXME: Can't handle virtual bases yet.
+      continue;
+    if (const abi::__class_type_info *Base =
+          findBaseAtOffset(VTI->base_info[base].__base_type,
+                           Offset - OffsetHere))
+      return Base;
+  }
+
+  return 0;
+}
+
 namespace {
 
 struct VtablePrefix {
@@ -160,8 +193,14 @@
   std::type_info *TypeInfo;
 };
 VtablePrefix *getVtablePrefix(void *Object) {
-  VtablePrefix **Ptr = reinterpret_cast<VtablePrefix**>(Object);
-  return *Ptr - 1;
+  VtablePrefix **VptrPtr = reinterpret_cast<VtablePrefix**>(Object);
+  if (!*VptrPtr)
+    return 0;
+  VtablePrefix *Prefix = *VptrPtr - 1;
+  if (Prefix->Offset > 0 || !Prefix->TypeInfo)
+    // This can't possibly be a valid vtable.
+    return 0;
+  return Prefix;
 }
 
 }
@@ -178,8 +217,7 @@
   }
 
   VtablePrefix *Vtable = getVtablePrefix(Object);
-  if (Vtable + 1 == 0 || Vtable->Offset > 0)
-    // This can't possibly be a valid vtable.
+  if (!Vtable)
     return false;
 
   // Check that this is actually a type_info object for a class type.
@@ -197,3 +235,14 @@
   *Bucket = Hash;
   return true;
 }
+
+__ubsan::DynamicTypeInfo __ubsan::getDynamicTypeInfo(void *Object) {
+  VtablePrefix *Vtable = getVtablePrefix(Object);
+  if (!Vtable)
+    return DynamicTypeInfo(0, 0, 0);
+  const abi::__class_type_info *ObjectType = findBaseAtOffset(
+    static_cast<const abi::__class_type_info*>(Vtable->TypeInfo),
+    -Vtable->Offset);
+  return DynamicTypeInfo(Vtable->TypeInfo->__type_name, -Vtable->Offset,
+                         ObjectType ? ObjectType->__type_name : "<unknown>");
+}
diff --git a/lib/ubsan/ubsan_type_hash.h b/lib/ubsan/ubsan_type_hash.h
index ac1be49..58ecd3d 100644
--- a/lib/ubsan/ubsan_type_hash.h
+++ b/lib/ubsan/ubsan_type_hash.h
@@ -19,6 +19,30 @@
 
 typedef uptr HashValue;
 
+/// \brief Information about the dynamic type of an object (extracted from its
+/// vptr).
+class DynamicTypeInfo {
+  const char *MostDerivedTypeName;
+  sptr Offset;
+  const char *SubobjectTypeName;
+
+public:
+  DynamicTypeInfo(const char *MDTN, sptr Offset, const char *STN)
+    : MostDerivedTypeName(MDTN), Offset(Offset), SubobjectTypeName(STN) {}
+
+  /// Determine whether the object had a valid dynamic type.
+  bool isValid() const { return MostDerivedTypeName; }
+  /// Get the name of the most-derived type of the object.
+  const char *getMostDerivedTypeName() const { return MostDerivedTypeName; }
+  /// Get the offset from the most-derived type to this base class.
+  sptr getOffset() const { return Offset; }
+  /// Get the name of the most-derived type at the specified offset.
+  const char *getSubobjectTypeName() const { return SubobjectTypeName; }
+};
+
+/// \brief Get information about the dynamic type of an object.
+DynamicTypeInfo getDynamicTypeInfo(void *Object);
+
 /// \brief Check whether the dynamic type of \p Object has a \p Type subobject
 /// at offset 0.
 /// \return \c true if the type matches, \c false if not.
@@ -31,7 +55,8 @@
 /// \code
 ///   __ubsan_vptr_type_cache[Hash % VptrTypeCacheSize] == Hash
 /// \endcode
-extern "C" HashValue __ubsan_vptr_type_cache[VptrTypeCacheSize];
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+HashValue __ubsan_vptr_type_cache[VptrTypeCacheSize];
 
 } // namespace __ubsan
 
diff --git a/lib/ubsan/ubsan_value.h b/lib/ubsan/ubsan_value.h
index 21313fc..e673f7a 100644
--- a/lib/ubsan/ubsan_value.h
+++ b/lib/ubsan/ubsan_value.h
@@ -20,6 +20,7 @@
 #error "UBSan not supported for this platform!"
 #endif
 
+#include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
 
 // FIXME: Move this out to a config header.
@@ -46,7 +47,6 @@
 /// \brief Largest floating-point type we support.
 typedef long double FloatMax;
 
-
 /// \brief A description of a source location. This corresponds to Clang's
 /// \c PresumedLoc type.
 class SourceLocation {
@@ -62,6 +62,21 @@
   /// \brief Determine whether the source location is known.
   bool isInvalid() const { return !Filename; }
 
+  /// \brief Atomically acquire a copy, disabling original in-place.
+  /// Exactly one call to acquire() returns a copy that isn't disabled.
+  SourceLocation acquire() {
+    u32 OldColumn = __sanitizer::atomic_exchange(
+                        (__sanitizer::atomic_uint32_t *)&Column, ~u32(0),
+                        __sanitizer::memory_order_relaxed);
+    return SourceLocation(Filename, Line, OldColumn);
+  }
+
+  /// \brief Determine if this Location has been disabled.
+  /// Disabled SourceLocations are invalid to use.
+  bool isDisabled() {
+    return Column == ~u32(0);
+  }
+
   /// \brief Get the presumed filename for the source location.
   const char *getFilename() const { return Filename; }
   /// \brief Get the presumed line number.
diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c
index 3242bbf..40af236 100644
--- a/lib/ucmpdi2.c
+++ b/lib/ucmpdi2.c
@@ -36,3 +36,16 @@
         return 2;
     return 1;
 }
+
+#ifdef __ARM_EABI__
+/* Returns: if (a <  b) returns -1
+*           if (a == b) returns  0
+*           if (a >  b) returns  1
+*/
+COMPILER_RT_ABI si_int
+__aeabi_ulcmp(di_int a, di_int b)
+{
+	return __ucmpdi2(a, b) - 1;
+}
+#endif
+
diff --git a/make/AppleBI.mk b/make/AppleBI.mk
index b5e702b..bb78853 100644
--- a/make/AppleBI.mk
+++ b/make/AppleBI.mk
@@ -57,7 +57,13 @@
 	   $(OBJROOT)/version.c -arch $* -dynamiclib \
 	   -install_name /usr/lib/system/libcompiler_rt.dylib \
 	   -compatibility_version 1 -current_version $(RC_ProjectSourceVersion) \
-	   -nodefaultlibs -lSystem -umbrella System -dead_strip \
+	   -nodefaultlibs -umbrella System -dead_strip \
+	   -Wl,-upward-lunwind \
+	   -Wl,-upward-lsystem_m \
+	   -Wl,-upward-lsystem_c \
+	   -Wl,-ldyld \
+	   -Wl,-lsystem_kernel \
+	   -L$(SDKROOT)/usr/lib/system \
 	   $(DYLIB_FLAGS) -Wl,-force_load,$^ -o $@ 
 
 # Rule to make fat dylib
diff --git a/make/platform/clang_darwin.mk b/make/platform/clang_darwin.mk
index fe84a05..5179ce7 100644
--- a/make/platform/clang_darwin.mk
+++ b/make/platform/clang_darwin.mk
@@ -83,12 +83,14 @@
 # object files. If we are on that platform, strip out all ARM archs. We still
 # build the libraries themselves so that Clang can find them where it expects
 # them, even though they might not have an expected slice.
+ifneq ($(shell which sw_vers),)
 ifneq ($(shell sw_vers -productVersion | grep 10.6),)
 UniversalArchs.ios := $(filter-out armv7, $(UniversalArchs.ios))
 UniversalArchs.cc_kext := $(filter-out armv7, $(UniversalArchs.cc_kext))
 UniversalArchs.cc_kext_ios5 := $(filter-out armv7, $(UniversalArchs.cc_kext_ios5))
 UniversalArchs.profile_ios := $(filter-out armv7, $(UniversalArchs.profile_ios))
 endif
+endif
 
 # If RC_SUPPORTED_ARCHS is defined, treat it as a list of the architectures we
 # are intended to support and limit what we try to build to that.
@@ -129,10 +131,12 @@
 CFLAGS.eprintf		:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
 CFLAGS.10.4		:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
 # FIXME: We can't build ASAN with our stub SDK yet.
-CFLAGS.asan_osx         := $(CFLAGS) -mmacosx-version-min=10.5 -fno-builtin
+CFLAGS.asan_osx         := $(CFLAGS) -mmacosx-version-min=10.5 -fno-builtin \
+                           -DASAN_FLEXIBLE_MAPPING_AND_OFFSET=1
 CFLAGS.asan_osx_dynamic := \
 	$(CFLAGS) -mmacosx-version-min=10.5 -fno-builtin \
-	-DMAC_INTERPOSE_FUNCTIONS=1
+	-DMAC_INTERPOSE_FUNCTIONS=1 \
+  -DASAN_FLEXIBLE_MAPPING_AND_OFFSET=1
 
 CFLAGS.ubsan_osx	:= $(CFLAGS) -mmacosx-version-min=10.5 -fno-builtin
 
@@ -165,7 +169,7 @@
 
 # Configure the asan_osx_dynamic library to be built shared.
 SHARED_LIBRARY.asan_osx_dynamic := 1
-LDFLAGS.asan_osx_dynamic := -framework Foundation -lstdc++
+LDFLAGS.asan_osx_dynamic := -framework Foundation -lstdc++ -undefined dynamic_lookup
 
 FUNCTIONS.eprintf := eprintf
 FUNCTIONS.10.4 := eprintf floatundidf floatundisf floatundixf
diff --git a/make/platform/clang_linux.mk b/make/platform/clang_linux.mk
index 1f73145..89f7268 100644
--- a/make/platform/clang_linux.mk
+++ b/make/platform/clang_linux.mk
@@ -60,11 +60,13 @@
 
 # Build runtime libraries for x86_64.
 ifeq ($(call contains,$(SupportedArches),x86_64),true)
-Configs += full-x86_64 profile-x86_64 asan-x86_64 tsan-x86_64 ubsan-x86_64
+Configs += full-x86_64 profile-x86_64 asan-x86_64 tsan-x86_64 msan-x86_64 \
+           ubsan-x86_64
 Arch.full-x86_64 := x86_64
 Arch.profile-x86_64 := x86_64
 Arch.asan-x86_64 := x86_64
 Arch.tsan-x86_64 := x86_64
+Arch.msan-x86_64 := x86_64
 Arch.ubsan-x86_64 := x86_64
 endif
 
@@ -84,9 +86,12 @@
 CFLAGS.full-x86_64 := $(CFLAGS) -m64
 CFLAGS.profile-i386 := $(CFLAGS) -m32
 CFLAGS.profile-x86_64 := $(CFLAGS) -m64
-CFLAGS.asan-i386 := $(CFLAGS) -m32 -fPIE -fno-builtin
-CFLAGS.asan-x86_64 := $(CFLAGS) -m64 -fPIE -fno-builtin
+CFLAGS.asan-i386 := $(CFLAGS) -m32 -fPIE -fno-builtin \
+                    -DASAN_FLEXIBLE_MAPPING_AND_OFFSET=1
+CFLAGS.asan-x86_64 := $(CFLAGS) -m64 -fPIE -fno-builtin \
+                    -DASAN_FLEXIBLE_MAPPING_AND_OFFSET=1
 CFLAGS.tsan-x86_64 := $(CFLAGS) -m64 -fPIE -fno-builtin
+CFLAGS.msan-x86_64 := $(CFLAGS) -m64 -fPIE -fno-builtin
 CFLAGS.ubsan-i386 := $(CFLAGS) -m32 -fPIE -fno-builtin
 CFLAGS.ubsan-x86_64 := $(CFLAGS) -m64 -fPIE -fno-builtin
 
@@ -96,7 +101,8 @@
 	-B$(LLVM_ANDROID_TOOLCHAIN_DIR)
 CFLAGS.asan-arm-android := $(CFLAGS) -fPIC -fno-builtin \
 	$(ANDROID_COMMON_FLAGS) -mllvm -arm-enable-ehabi
-LDFLAGS.asan-arm-android := $(LDFLAGS) $(ANDROID_COMMON_FLAGS) -ldl
+LDFLAGS.asan-arm-android := $(LDFLAGS) $(ANDROID_COMMON_FLAGS) -ldl \
+	-Wl,-soname=libclang_rt.asan-arm-android.so
 
 # Use our stub SDK as the sysroot to support more portable building. For now we
 # just do this for the non-ASAN modules, because the stub SDK doesn't have
@@ -118,6 +124,8 @@
                                           $(SanitizerCommonFunctions)
 FUNCTIONS.tsan-x86_64 := $(TsanFunctions) $(InterceptionFunctions) \
                                           $(SanitizerCommonFunctions)
+FUNCTIONS.msan-x86_64 := $(MsanFunctions) $(InterceptionFunctions) \
+                                          $(SanitizerCommonFunctions)
 FUNCTIONS.ubsan-i386 := $(UbsanFunctions) $(SanitizerCommonFunctions)
 FUNCTIONS.ubsan-x86_64 := $(UbsanFunctions) $(SanitizerCommonFunctions)