Updating branches/google/stable to r169803

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/google/stable@170219 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/.arcconfig b/.arcconfig
new file mode 100644
index 0000000..413b70b
--- /dev/null
+++ b/.arcconfig
@@ -0,0 +1,4 @@
+{
+  "project_id" : "compiler-rt",
+  "conduit_uri" : "http://llvm-reviews.chandlerc.com/"
+}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d5f6696..4544f15 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,6 +72,14 @@
 if(NOT WIN32)
   list(APPEND SANITIZER_COMMON_CFLAGS -fvisibility=hidden)
 endif()
+# Build sanitizer runtimes with debug info.
+check_cxx_compiler_flag(-gline-tables-only SUPPORTS_GLINE_TABLES_ONLY_FLAG)
+if(SUPPORTS_GLINE_TABLES_ONLY_FLAG)
+  list(APPEND SANITIZER_COMMON_CFLAGS -gline-tables-only)
+else()
+  list(APPEND SANITIZER_COMMON_CFLAGS -g)
+endif()
+# Warnings suppressions.
 check_cxx_compiler_flag(-Wno-variadic-macros SUPPORTS_NO_VARIADIC_MACROS_FLAG)
 if(SUPPORTS_NO_VARIADIC_MACROS_FLAG)
   list(APPEND SANITIZER_COMMON_CFLAGS -Wno-variadic-macros)
@@ -138,9 +146,6 @@
 # Add the public header's directory to the includes for all of compiler-rt.
 include_directories(include)
 
-# Build utils before building compiler-rt library.
-add_subdirectory(utils)
-
 add_subdirectory(lib)
 
 if(LLVM_INCLUDE_TESTS)
diff --git a/SDKs/darwin/usr/include/stdio.h b/SDKs/darwin/usr/include/stdio.h
index 7441388..63b10a8 100644
--- a/SDKs/darwin/usr/include/stdio.h
+++ b/SDKs/darwin/usr/include/stdio.h
@@ -17,6 +17,10 @@
 #ifndef __STDIO_H__
 #define __STDIO_H__
 
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 typedef struct __sFILE FILE;
 typedef __SIZE_TYPE__ size_t;
 
@@ -63,12 +67,18 @@
 
 int fclose(FILE *);
 int fflush(FILE *);
-FILE *fopen(const char * restrict, const char * restrict) __asm(__FOPEN_NAME);
-int fprintf(FILE * restrict, const char * restrict, ...);
-size_t fwrite(const void * restrict, size_t, size_t, FILE * restrict)
+FILE *fopen(const char * __restrict, const char * __restrict) __asm(__FOPEN_NAME);
+int fprintf(FILE * __restrict, const char * __restrict, ...);
+size_t fwrite(const void * __restrict, size_t, size_t, FILE * __restrict)
   __asm(__FWRITE_NAME);
 size_t fread(void * __restrict, size_t, size_t, FILE * __restrict);
 long ftell(FILE *);
 int fseek(FILE *, long, int);
 
+int snprintf(char * __restrict, size_t, const char * __restrict, ...);
+
+#if defined(__cplusplus)
+}
+#endif
+
 #endif /* __STDIO_H__ */
diff --git a/include/sanitizer/asan_interface.h b/include/sanitizer/asan_interface.h
index 788e90f..5b6a909 100644
--- a/include/sanitizer/asan_interface.h
+++ b/include/sanitizer/asan_interface.h
@@ -66,6 +66,15 @@
   void __asan_stack_free(uptr ptr, uptr size, uptr real_stack)
       SANITIZER_INTERFACE_ATTRIBUTE;
 
+  // These two functions are used by instrumented code in the
+  // use-after-scope mode. They mark memory for local variables as
+  // unaddressable when they leave scope and addressable before the
+  // function exits.
+  void __asan_poison_stack_memory(uptr addr, uptr size)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+  void __asan_unpoison_stack_memory(uptr addr, uptr size)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
   // Marks memory region [addr, addr+size) as unaddressable.
   // This memory must be previously allocated by the user program. Accessing
   // addresses in this region from instrumented code is forbidden until
@@ -131,7 +140,7 @@
   // User may provide function that would be called right when ASan detects
   // an error. This can be used to notice cases when ASan detects an error, but
   // the program crashes before ASan report is printed.
-  void __asan_on_error()
+  /* OPTIONAL */ void __asan_on_error()
       SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
 
   // User may provide its own implementation for symbolization function.
@@ -139,7 +148,8 @@
   // "out_buffer". Description should be at most "out_size" bytes long.
   // User-specified function should return true if symbolization was
   // successful.
-  bool __asan_symbolize(const void *pc, char *out_buffer, int out_size)
+  /* OPTIONAL */ bool __asan_symbolize(const void *pc, char *out_buffer,
+                                       int out_size)
       SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
 
   // Returns the estimated number of bytes that will be reserved by allocator
@@ -179,20 +189,19 @@
   void __asan_print_accumulated_stats()
       SANITIZER_INTERFACE_ATTRIBUTE;
 
-  // This function may be overriden by user to provide a string containing
-  // ASan runtime options. See asan_flags.h for details.
-  const char* __asan_default_options()
+  // This function may be optionally provided by user and should return
+  // a string containing ASan runtime options. See asan_flags.h for details.
+  /* OPTIONAL */ const char* __asan_default_options()
       SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
 
-  // Malloc hooks that may be overriden by user.
+  // Malloc hooks that may be optionally provided by user.
   // __asan_malloc_hook(ptr, size) is called immediately after
   //   allocation of "size" bytes, which returned "ptr".
   // __asan_free_hook(ptr) is called immediately before
   //   deallocation of "ptr".
-  // If user doesn't provide implementations of these hooks, they are no-op.
-  void __asan_malloc_hook(void *ptr, uptr size)
+  /* OPTIONAL */ void __asan_malloc_hook(void *ptr, uptr size)
       SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
-  void __asan_free_hook(void *ptr)
+  /* OPTIONAL */ void __asan_free_hook(void *ptr)
       SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
 }  // extern "C"
 
diff --git a/include/sanitizer/common_interface_defs.h b/include/sanitizer/common_interface_defs.h
index cbce63f..9d8fa55 100644
--- a/include/sanitizer/common_interface_defs.h
+++ b/include/sanitizer/common_interface_defs.h
@@ -30,6 +30,12 @@
 # define SANITIZER_WEAK_ATTRIBUTE  __attribute__((weak))
 #endif
 
+#ifdef __linux__
+# define SANITIZER_SUPPORTS_WEAK_HOOKS 1
+#else
+# define SANITIZER_SUPPORTS_WEAK_HOOKS 0
+#endif
+
 // __has_feature
 #if !defined(__has_feature)
 # define __has_feature(x) 0
@@ -48,6 +54,13 @@
 typedef unsigned long uptr;  // NOLINT
 typedef signed   long sptr;  // NOLINT
 #endif  // defined(_WIN64)
+#if defined(__x86_64__)
+// Since x32 uses ILP32 data model in 64-bit hardware mode,  we must use
+// 64-bit pointer to unwind stack frame.
+typedef unsigned long long uhwptr;  // NOLINT
+#else
+typedef uptr uhwptr;   // NOLINT
+#endif
 typedef unsigned char u8;
 typedef unsigned short u16;  // NOLINT
 typedef unsigned int u32;
@@ -63,6 +76,17 @@
   // Tell the tools to write their reports to "path.<pid>" instead of stderr.
   void __sanitizer_set_report_path(const char *path)
       SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // Tell the tools to write their reports to given file descriptor instead of
+  // stderr.
+  void __sanitizer_set_report_fd(int fd)
+      SANITIZER_INTERFACE_ATTRIBUTE;
+
+  // Notify the tools that the sandbox is going to be turned on. The reserved
+  // parameter will be used in the future to hold a structure with functions
+  // that the tools may call to bypass the sandbox.
+  void __sanitizer_sandbox_on_notify(void *reserved)
+      SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE;
 }  // extern "C"
 
 #endif  // SANITIZER_COMMON_INTERFACE_DEFS_H
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 6febaf5..b7cd07e 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -6,12 +6,11 @@
   add_subdirectory(asan)
   add_subdirectory(interception)
   add_subdirectory(sanitizer_common)
+  add_subdirectory(ubsan)
 endif()
 if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
   # ThreadSanitizer is supported on Linux only.
   add_subdirectory(tsan)
-  # UndefinedBehaviorSanitizer has been tested on Linux only.
-  add_subdirectory(ubsan)
 endif()
 
 # FIXME: Add support for the profile library.
diff --git a/lib/Makefile.mk b/lib/Makefile.mk
index 791921a..ea471e0 100644
--- a/lib/Makefile.mk
+++ b/lib/Makefile.mk
@@ -19,6 +19,7 @@
 SubDirs += profile
 SubDirs += sanitizer_common
 SubDirs += tsan
+SubDirs += ubsan
 
 # FIXME: We don't currently support building an atomic library, and as it must
 # be a separate library from the runtime library, we need to remove its source
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt
index 884a718..0f561e6 100644
--- a/lib/asan/CMakeLists.txt
+++ b/lib/asan/CMakeLists.txt
@@ -2,6 +2,8 @@
 
 set(ASAN_SOURCES
   asan_allocator.cc
+  asan_allocator2.cc
+  asan_fake_stack.cc
   asan_globals.cc
   asan_interceptors.cc
   asan_linux.cc
diff --git a/lib/asan/asan_allocator.cc b/lib/asan/asan_allocator.cc
index de37137..4d422a7 100644
--- a/lib/asan/asan_allocator.cc
+++ b/lib/asan/asan_allocator.cc
@@ -24,8 +24,9 @@
 // Once freed, the body of the chunk contains the stack trace of the free call.
 //
 //===----------------------------------------------------------------------===//
-
 #include "asan_allocator.h"
+
+#if ASAN_ALLOCATOR_VERSION == 1
 #include "asan_interceptors.h"
 #include "asan_internal.h"
 #include "asan_lock.h"
@@ -58,43 +59,12 @@
 static const uptr kMallocSizeClassStep = 1UL << kMallocSizeClassStepLog;
 
 static const uptr kMaxAllowedMallocSize =
-    (__WORDSIZE == 32) ? 3UL << 30 : 8UL << 30;
+    (SANITIZER_WORDSIZE == 32) ? 3UL << 30 : 8UL << 30;
 
 static inline bool IsAligned(uptr a, uptr alignment) {
   return (a & (alignment - 1)) == 0;
 }
 
-static inline uptr Log2(uptr x) {
-  CHECK(IsPowerOfTwo(x));
-#if !defined(_WIN32) || defined(__clang__)
-  return __builtin_ctzl(x);
-#elif defined(_WIN64)
-  unsigned long ret;  // NOLINT
-  _BitScanForward64(&ret, x);
-  return ret;
-#else
-  unsigned long ret;  // NOLINT
-  _BitScanForward(&ret, x);
-  return ret;
-#endif
-}
-
-static inline uptr RoundUpToPowerOfTwo(uptr size) {
-  CHECK(size);
-  if (IsPowerOfTwo(size)) return size;
-
-  unsigned long up;  // NOLINT
-#if !defined(_WIN32) || defined(__clang__)
-  up = __WORDSIZE - 1 - __builtin_clzl(size);
-#elif defined(_WIN64)
-  _BitScanReverse64(&up, size);
-#else
-  _BitScanReverse(&up, size);
-#endif
-  CHECK(size < (1ULL << (up + 1)));
-  CHECK(size > (1ULL << up));
-  return 1UL << (up + 1);
-}
 
 static inline uptr SizeClassToSize(u8 size_class) {
   CHECK(size_class < kNumberOfSizeClasses);
@@ -132,7 +102,7 @@
 }
 
 static u8 *MmapNewPagesAndPoisonShadow(uptr size) {
-  CHECK(IsAligned(size, kPageSize));
+  CHECK(IsAligned(size, GetPageSizeCached()));
   u8 *res = (u8*)MmapOrDie(size, __FUNCTION__);
   PoisonShadow((uptr)res, size, kAsanHeapLeftRedzoneMagic);
   if (flags()->debug) {
@@ -534,12 +504,13 @@
     uptr mmap_size = Max(size, kMinMmapSize);
     uptr n_chunks = mmap_size / size;
     CHECK(n_chunks * size == mmap_size);
-    if (size < kPageSize) {
+    uptr PageSize = GetPageSizeCached();
+    if (size < PageSize) {
       // Size is small, just poison the last chunk.
       n_chunks--;
     } else {
       // Size is large, allocate an extra page at right and poison it.
-      mmap_size += kPageSize;
+      mmap_size += PageSize;
     }
     CHECK(n_chunks > 0);
     u8 *mem = MmapNewPagesAndPoisonShadow(mmap_size);
@@ -756,7 +727,8 @@
 
 }  // namespace __asan
 
-// Default (no-op) implementation of malloc hooks.
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+// Provide default (no-op) implementation of malloc hooks.
 extern "C" {
 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
 void __asan_malloc_hook(void *ptr, uptr size) {
@@ -768,26 +740,27 @@
   (void)ptr;
 }
 }  // extern "C"
+#endif
 
 namespace __asan {
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void *asan_memalign(uptr alignment, uptr size, StackTrace *stack) {
   void *ptr = (void*)Allocate(alignment, size, stack);
-  __asan_malloc_hook(ptr, size);
+  ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void asan_free(void *ptr, StackTrace *stack) {
-  __asan_free_hook(ptr);
+  ASAN_FREE_HOOK(ptr);
   Deallocate((u8*)ptr, stack);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void *asan_malloc(uptr size, StackTrace *stack) {
   void *ptr = (void*)Allocate(0, size, stack);
-  __asan_malloc_hook(ptr, size);
+  ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
 
@@ -795,17 +768,17 @@
   void *ptr = (void*)Allocate(0, nmemb * size, stack);
   if (ptr)
     REAL(memset)(ptr, 0, nmemb * size);
-  __asan_malloc_hook(ptr, nmemb * size);
+  ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
 
 void *asan_realloc(void *p, uptr size, StackTrace *stack) {
   if (p == 0) {
     void *ptr = (void*)Allocate(0, size, stack);
-    __asan_malloc_hook(ptr, size);
+    ASAN_MALLOC_HOOK(ptr, size);
     return ptr;
   } else if (size == 0) {
-    __asan_free_hook(p);
+    ASAN_FREE_HOOK(p);
     Deallocate((u8*)p, stack);
     return 0;
   }
@@ -813,19 +786,20 @@
 }
 
 void *asan_valloc(uptr size, StackTrace *stack) {
-  void *ptr = (void*)Allocate(kPageSize, size, stack);
-  __asan_malloc_hook(ptr, size);
+  void *ptr = (void*)Allocate(GetPageSizeCached(), size, stack);
+  ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
 
 void *asan_pvalloc(uptr size, StackTrace *stack) {
-  size = RoundUpTo(size, kPageSize);
+  uptr PageSize = GetPageSizeCached();
+  size = RoundUpTo(size, PageSize);
   if (size == 0) {
     // pvalloc(0) should allocate one page.
-    size = kPageSize;
+    size = PageSize;
   }
-  void *ptr = (void*)Allocate(kPageSize, size, stack);
-  __asan_malloc_hook(ptr, size);
+  void *ptr = (void*)Allocate(PageSize, size, stack);
+  ASAN_MALLOC_HOOK(ptr, size);
   return ptr;
 }
 
@@ -833,7 +807,7 @@
                           StackTrace *stack) {
   void *ptr = Allocate(alignment, size, stack);
   CHECK(IsAligned((uptr)ptr, alignment));
-  __asan_malloc_hook(ptr, size);
+  ASAN_MALLOC_HOOK(ptr, size);
   *memptr = ptr;
   return 0;
 }
@@ -860,170 +834,11 @@
   malloc_info.ForceUnlock();
 }
 
-// ---------------------- Fake stack-------------------- {{{1
-FakeStack::FakeStack() {
-  CHECK(REAL(memset) != 0);
-  REAL(memset)(this, 0, sizeof(*this));
-}
-
-bool FakeStack::AddrIsInSizeClass(uptr addr, uptr size_class) {
-  uptr mem = allocated_size_classes_[size_class];
-  uptr size = ClassMmapSize(size_class);
-  bool res = mem && addr >= mem && addr < mem + size;
-  return res;
-}
-
-uptr FakeStack::AddrIsInFakeStack(uptr addr) {
-  for (uptr i = 0; i < kNumberOfSizeClasses; i++) {
-    if (AddrIsInSizeClass(addr, i)) return allocated_size_classes_[i];
-  }
-  return 0;
-}
-
-// We may want to compute this during compilation.
-inline uptr FakeStack::ComputeSizeClass(uptr alloc_size) {
-  uptr rounded_size = RoundUpToPowerOfTwo(alloc_size);
-  uptr log = Log2(rounded_size);
-  CHECK(alloc_size <= (1UL << log));
-  if (!(alloc_size > (1UL << (log-1)))) {
-    Printf("alloc_size %zu log %zu\n", alloc_size, log);
-  }
-  CHECK(alloc_size > (1UL << (log-1)));
-  uptr res = log < kMinStackFrameSizeLog ? 0 : log - kMinStackFrameSizeLog;
-  CHECK(res < kNumberOfSizeClasses);
-  CHECK(ClassSize(res) >= rounded_size);
-  return res;
-}
-
-void FakeFrameFifo::FifoPush(FakeFrame *node) {
-  CHECK(node);
-  node->next = 0;
-  if (first_ == 0 && last_ == 0) {
-    first_ = last_ = node;
-  } else {
-    CHECK(first_);
-    CHECK(last_);
-    last_->next = node;
-    last_ = node;
-  }
-}
-
-FakeFrame *FakeFrameFifo::FifoPop() {
-  CHECK(first_ && last_ && "Exhausted fake stack");
-  FakeFrame *res = 0;
-  if (first_ == last_) {
-    res = first_;
-    first_ = last_ = 0;
-  } else {
-    res = first_;
-    first_ = first_->next;
-  }
-  return res;
-}
-
-void FakeStack::Init(uptr stack_size) {
-  stack_size_ = stack_size;
-  alive_ = true;
-}
-
-void FakeStack::Cleanup() {
-  alive_ = false;
-  for (uptr i = 0; i < kNumberOfSizeClasses; i++) {
-    uptr mem = allocated_size_classes_[i];
-    if (mem) {
-      PoisonShadow(mem, ClassMmapSize(i), 0);
-      allocated_size_classes_[i] = 0;
-      UnmapOrDie((void*)mem, ClassMmapSize(i));
-    }
-  }
-}
-
-uptr FakeStack::ClassMmapSize(uptr size_class) {
-  return RoundUpToPowerOfTwo(stack_size_);
-}
-
-void FakeStack::AllocateOneSizeClass(uptr size_class) {
-  CHECK(ClassMmapSize(size_class) >= kPageSize);
-  uptr new_mem = (uptr)MmapOrDie(
-      ClassMmapSize(size_class), __FUNCTION__);
-  // Printf("T%d new_mem[%zu]: %p-%p mmap %zu\n",
-  //       asanThreadRegistry().GetCurrent()->tid(),
-  //       size_class, new_mem, new_mem + ClassMmapSize(size_class),
-  //       ClassMmapSize(size_class));
-  uptr i;
-  for (i = 0; i < ClassMmapSize(size_class);
-       i += ClassSize(size_class)) {
-    size_classes_[size_class].FifoPush((FakeFrame*)(new_mem + i));
-  }
-  CHECK(i == ClassMmapSize(size_class));
-  allocated_size_classes_[size_class] = new_mem;
-}
-
-uptr FakeStack::AllocateStack(uptr size, uptr real_stack) {
-  if (!alive_) return real_stack;
-  CHECK(size <= kMaxStackMallocSize && size > 1);
-  uptr size_class = ComputeSizeClass(size);
-  if (!allocated_size_classes_[size_class]) {
-    AllocateOneSizeClass(size_class);
-  }
-  FakeFrame *fake_frame = size_classes_[size_class].FifoPop();
-  CHECK(fake_frame);
-  fake_frame->size_minus_one = size - 1;
-  fake_frame->real_stack = real_stack;
-  while (FakeFrame *top = call_stack_.top()) {
-    if (top->real_stack > real_stack) break;
-    call_stack_.LifoPop();
-    DeallocateFrame(top);
-  }
-  call_stack_.LifoPush(fake_frame);
-  uptr ptr = (uptr)fake_frame;
-  PoisonShadow(ptr, size, 0);
-  return ptr;
-}
-
-void FakeStack::DeallocateFrame(FakeFrame *fake_frame) {
-  CHECK(alive_);
-  uptr size = fake_frame->size_minus_one + 1;
-  uptr size_class = ComputeSizeClass(size);
-  CHECK(allocated_size_classes_[size_class]);
-  uptr ptr = (uptr)fake_frame;
-  CHECK(AddrIsInSizeClass(ptr, size_class));
-  CHECK(AddrIsInSizeClass(ptr + size - 1, size_class));
-  size_classes_[size_class].FifoPush(fake_frame);
-}
-
-void FakeStack::OnFree(uptr ptr, uptr size, uptr real_stack) {
-  FakeFrame *fake_frame = (FakeFrame*)ptr;
-  CHECK(fake_frame->magic = kRetiredStackFrameMagic);
-  CHECK(fake_frame->descr != 0);
-  CHECK(fake_frame->size_minus_one == size - 1);
-  PoisonShadow(ptr, size, kAsanStackAfterReturnMagic);
-}
-
 }  // namespace __asan
 
 // ---------------------- Interface ---------------- {{{1
 using namespace __asan;  // NOLINT
 
-uptr __asan_stack_malloc(uptr size, uptr real_stack) {
-  if (!flags()->use_fake_stack) return real_stack;
-  AsanThread *t = asanThreadRegistry().GetCurrent();
-  if (!t) {
-    // TSD is gone, use the real stack.
-    return real_stack;
-  }
-  uptr ptr = t->fake_stack().AllocateStack(size, real_stack);
-  // Printf("__asan_stack_malloc %p %zu %p\n", ptr, size, real_stack);
-  return ptr;
-}
-
-void __asan_stack_free(uptr ptr, uptr size, uptr real_stack) {
-  if (!flags()->use_fake_stack) return;
-  if (ptr != real_stack) {
-    FakeStack::OnFree(ptr, size, real_stack);
-  }
-}
-
 // ASan allocator doesn't reserve extra bytes, so normally we would
 // just return "size".
 uptr __asan_get_estimated_allocated_size(uptr size) {
@@ -1045,3 +860,4 @@
   }
   return allocated_size;
 }
+#endif  // ASAN_ALLOCATOR_VERSION
diff --git a/lib/asan/asan_allocator.h b/lib/asan/asan_allocator.h
index 1c6c30b..1e936c3 100644
--- a/lib/asan/asan_allocator.h
+++ b/lib/asan/asan_allocator.h
@@ -18,6 +18,12 @@
 #include "asan_internal.h"
 #include "asan_interceptors.h"
 
+// We are in the process of transitioning from the old allocator (version 1)
+// to a new one (version 2). The change is quite intrusive so both allocators
+// will co-exist in the source base for a while. The actual allocator is chosen
+// at build time by redefining this macrozz.
+#define ASAN_ALLOCATOR_VERSION 1
+
 namespace __asan {
 
 static const uptr kNumberOfSizeClasses = 255;
@@ -175,5 +181,40 @@
 void asan_mz_force_lock();
 void asan_mz_force_unlock();
 
+// Log2 and RoundUpToPowerOfTwo should be inlined for performance.
+
+static inline uptr Log2(uptr x) {
+  CHECK(IsPowerOfTwo(x));
+#if !defined(_WIN32) || defined(__clang__)
+  return __builtin_ctzl(x);
+#elif defined(_WIN64)
+  unsigned long ret;  // NOLINT
+  _BitScanForward64(&ret, x);
+  return ret;
+#else
+  unsigned long ret;  // NOLINT
+  _BitScanForward(&ret, x);
+  return ret;
+#endif
+}
+
+static inline uptr RoundUpToPowerOfTwo(uptr size) {
+  CHECK(size);
+  if (IsPowerOfTwo(size)) return size;
+
+  unsigned long up;  // NOLINT
+#if !defined(_WIN32) || defined(__clang__)
+  up = SANITIZER_WORDSIZE - 1 - __builtin_clzl(size);
+#elif defined(_WIN64)
+  _BitScanReverse64(&up, size);
+#else
+  _BitScanReverse(&up, size);
+#endif
+  CHECK(size < (1ULL << (up + 1)));
+  CHECK(size > (1ULL << up));
+  return 1UL << (up + 1);
+}
+
+
 }  // namespace __asan
 #endif  // ASAN_ALLOCATOR_H
diff --git a/lib/asan/asan_allocator2.cc b/lib/asan/asan_allocator2.cc
new file mode 100644
index 0000000..bb3a15e
--- /dev/null
+++ b/lib/asan/asan_allocator2.cc
@@ -0,0 +1,43 @@
+//===-- asan_allocator2.cc ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Implementation of ASan's memory allocator, 2-nd version.
+// This variant uses the allocator from sanitizer_common, i.e. the one shared
+// with ThreadSanitizer and MemorySanitizer.
+//
+// Status: under development, not enabled by default yet.
+//===----------------------------------------------------------------------===//
+#include "asan_allocator.h"
+#if ASAN_ALLOCATOR_VERSION == 2
+
+#include "sanitizer_common/sanitizer_allocator.h"
+
+namespace __asan {
+
+#if SANITIZER_WORDSIZE == 64
+const uptr kAllocatorSpace = 0x600000000000ULL;
+const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
+typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0 /*metadata*/,
+    DefaultSizeClassMap> PrimaryAllocator;
+#elif SANITIZER_WORDSIZE == 32
+static const u64 kAddressSpaceSize = 1ULL << 32;
+typedef SizeClassAllocator32<
+  0, kAddressSpaceSize, 16, CompactSizeClassMap> PrimaryAllocator;
+#endif
+
+typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
+typedef LargeMmapAllocator SecondaryAllocator;
+typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
+    SecondaryAllocator> Allocator;
+
+
+}  // namespace __asan
+#endif  // ASAN_ALLOCATOR_VERSION
diff --git a/lib/asan/asan_fake_stack.cc b/lib/asan/asan_fake_stack.cc
new file mode 100644
index 0000000..7c5a163
--- /dev/null
+++ b/lib/asan/asan_fake_stack.cc
@@ -0,0 +1,182 @@
+//===-- asan_fake_stack.cc ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// FakeStack is used to detect use-after-return bugs.
+//===----------------------------------------------------------------------===//
+#include "asan_allocator.h"
+#include "asan_thread.h"
+#include "asan_thread_registry.h"
+#include "sanitizer/asan_interface.h"
+
+namespace __asan {
+
+FakeStack::FakeStack() {
+  CHECK(REAL(memset) != 0);
+  REAL(memset)(this, 0, sizeof(*this));
+}
+
+bool FakeStack::AddrIsInSizeClass(uptr addr, uptr size_class) {
+  uptr mem = allocated_size_classes_[size_class];
+  uptr size = ClassMmapSize(size_class);
+  bool res = mem && addr >= mem && addr < mem + size;
+  return res;
+}
+
+uptr FakeStack::AddrIsInFakeStack(uptr addr) {
+  for (uptr i = 0; i < kNumberOfSizeClasses; i++) {
+    if (AddrIsInSizeClass(addr, i)) return allocated_size_classes_[i];
+  }
+  return 0;
+}
+
+// We may want to compute this during compilation.
+inline uptr FakeStack::ComputeSizeClass(uptr alloc_size) {
+  uptr rounded_size = RoundUpToPowerOfTwo(alloc_size);
+  uptr log = Log2(rounded_size);
+  CHECK(alloc_size <= (1UL << log));
+  if (!(alloc_size > (1UL << (log-1)))) {
+    Printf("alloc_size %zu log %zu\n", alloc_size, log);
+  }
+  CHECK(alloc_size > (1UL << (log-1)));
+  uptr res = log < kMinStackFrameSizeLog ? 0 : log - kMinStackFrameSizeLog;
+  CHECK(res < kNumberOfSizeClasses);
+  CHECK(ClassSize(res) >= rounded_size);
+  return res;
+}
+
+void FakeFrameFifo::FifoPush(FakeFrame *node) {
+  CHECK(node);
+  node->next = 0;
+  if (first_ == 0 && last_ == 0) {
+    first_ = last_ = node;
+  } else {
+    CHECK(first_);
+    CHECK(last_);
+    last_->next = node;
+    last_ = node;
+  }
+}
+
+FakeFrame *FakeFrameFifo::FifoPop() {
+  CHECK(first_ && last_ && "Exhausted fake stack");
+  FakeFrame *res = 0;
+  if (first_ == last_) {
+    res = first_;
+    first_ = last_ = 0;
+  } else {
+    res = first_;
+    first_ = first_->next;
+  }
+  return res;
+}
+
+void FakeStack::Init(uptr stack_size) {
+  stack_size_ = stack_size;
+  alive_ = true;
+}
+
+void FakeStack::Cleanup() {
+  alive_ = false;
+  for (uptr i = 0; i < kNumberOfSizeClasses; i++) {
+    uptr mem = allocated_size_classes_[i];
+    if (mem) {
+      PoisonShadow(mem, ClassMmapSize(i), 0);
+      allocated_size_classes_[i] = 0;
+      UnmapOrDie((void*)mem, ClassMmapSize(i));
+    }
+  }
+}
+
+uptr FakeStack::ClassMmapSize(uptr size_class) {
+  return RoundUpToPowerOfTwo(stack_size_);
+}
+
+void FakeStack::AllocateOneSizeClass(uptr size_class) {
+  CHECK(ClassMmapSize(size_class) >= GetPageSizeCached());
+  uptr new_mem = (uptr)MmapOrDie(
+      ClassMmapSize(size_class), __FUNCTION__);
+  // Printf("T%d new_mem[%zu]: %p-%p mmap %zu\n",
+  //       asanThreadRegistry().GetCurrent()->tid(),
+  //       size_class, new_mem, new_mem + ClassMmapSize(size_class),
+  //       ClassMmapSize(size_class));
+  uptr i;
+  for (i = 0; i < ClassMmapSize(size_class);
+       i += ClassSize(size_class)) {
+    size_classes_[size_class].FifoPush((FakeFrame*)(new_mem + i));
+  }
+  CHECK(i == ClassMmapSize(size_class));
+  allocated_size_classes_[size_class] = new_mem;
+}
+
+uptr FakeStack::AllocateStack(uptr size, uptr real_stack) {
+  if (!alive_) return real_stack;
+  CHECK(size <= kMaxStackMallocSize && size > 1);
+  uptr size_class = ComputeSizeClass(size);
+  if (!allocated_size_classes_[size_class]) {
+    AllocateOneSizeClass(size_class);
+  }
+  FakeFrame *fake_frame = size_classes_[size_class].FifoPop();
+  CHECK(fake_frame);
+  fake_frame->size_minus_one = size - 1;
+  fake_frame->real_stack = real_stack;
+  while (FakeFrame *top = call_stack_.top()) {
+    if (top->real_stack > real_stack) break;
+    call_stack_.LifoPop();
+    DeallocateFrame(top);
+  }
+  call_stack_.LifoPush(fake_frame);
+  uptr ptr = (uptr)fake_frame;
+  PoisonShadow(ptr, size, 0);
+  return ptr;
+}
+
+void FakeStack::DeallocateFrame(FakeFrame *fake_frame) {
+  CHECK(alive_);
+  uptr size = fake_frame->size_minus_one + 1;
+  uptr size_class = ComputeSizeClass(size);
+  CHECK(allocated_size_classes_[size_class]);
+  uptr ptr = (uptr)fake_frame;
+  CHECK(AddrIsInSizeClass(ptr, size_class));
+  CHECK(AddrIsInSizeClass(ptr + size - 1, size_class));
+  size_classes_[size_class].FifoPush(fake_frame);
+}
+
+void FakeStack::OnFree(uptr ptr, uptr size, uptr real_stack) {
+  FakeFrame *fake_frame = (FakeFrame*)ptr;
+  CHECK(fake_frame->magic = kRetiredStackFrameMagic);
+  CHECK(fake_frame->descr != 0);
+  CHECK(fake_frame->size_minus_one == size - 1);
+  PoisonShadow(ptr, size, kAsanStackAfterReturnMagic);
+}
+
+}  // namespace __asan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+uptr __asan_stack_malloc(uptr size, uptr real_stack) {
+  if (!flags()->use_fake_stack) return real_stack;
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  if (!t) {
+    // TSD is gone, use the real stack.
+    return real_stack;
+  }
+  uptr ptr = t->fake_stack().AllocateStack(size, real_stack);
+  // Printf("__asan_stack_malloc %p %zu %p\n", ptr, size, real_stack);
+  return ptr;
+}
+
+void __asan_stack_free(uptr ptr, uptr size, uptr real_stack) {
+  if (!flags()->use_fake_stack) return;
+  if (ptr != real_stack) {
+    FakeStack::OnFree(ptr, size, real_stack);
+  }
+}
diff --git a/lib/asan/asan_intercepted_functions.h b/lib/asan/asan_intercepted_functions.h
index 03f126b..60b05e6 100644
--- a/lib/asan/asan_intercepted_functions.h
+++ b/lib/asan/asan_intercepted_functions.h
@@ -41,8 +41,10 @@
 
 #if defined(__linux__)
 # define ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX 1
+# define ASAN_INTERCEPT_PRCTL 1
 #else
 # define ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX 0
+# define ASAN_INTERCEPT_PRCTL 0
 #endif
 
 #if !defined(__APPLE__)
@@ -51,6 +53,12 @@
 # define ASAN_INTERCEPT_STRNLEN 0
 #endif
 
+#if defined(__linux__) && !defined(ANDROID)
+# define ASAN_INTERCEPT_SWAPCONTEXT 1
+#else
+# define ASAN_INTERCEPT_SWAPCONTEXT 0
+#endif
+
 #if !defined(ANDROID) && !defined(_WIN32)
 # define ASAN_INTERCEPT_SIGNAL_AND_SIGACTION 1
 #else
@@ -199,7 +207,7 @@
 DECLARE_FUNCTION_AND_WRAPPER(CFStringRef, CFStringCreateCopy,
                              CFAllocatorRef alloc, CFStringRef str);
 DECLARE_FUNCTION_AND_WRAPPER(void, free, void* ptr);
-#if MAC_INTERPOSE_FUNCTIONS
+#if MAC_INTERPOSE_FUNCTIONS && !defined(MISSING_BLOCKS_SUPPORT)
 DECLARE_FUNCTION_AND_WRAPPER(void, dispatch_group_async,
                              dispatch_group_t dg,
                              dispatch_queue_t dq, void (^work)(void));
diff --git a/lib/asan/asan_interceptors.cc b/lib/asan/asan_interceptors.cc
index 48dfeb0..2eecede 100644
--- a/lib/asan/asan_interceptors.cc
+++ b/lib/asan/asan_interceptors.cc
@@ -136,6 +136,28 @@
     struct sigaction *oldact);
 #endif  // ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
 
+#if ASAN_INTERCEPT_SWAPCONTEXT
+INTERCEPTOR(int, swapcontext, struct ucontext_t *oucp,
+            struct ucontext_t *ucp) {
+  static bool reported_warning = false;
+  if (!reported_warning) {
+    Report("WARNING: ASan doesn't fully support makecontext/swapcontext "
+           "functions and may produce false positives in some cases!\n");
+    reported_warning = true;
+  }
+  // Clear shadow memory for new context (it may share stack
+  // with current context).
+  ClearShadowMemoryForContext(ucp);
+  int res = REAL(swapcontext)(oucp, ucp);
+  // swapcontext technically does not return, but program may swap context to
+  // "oucp" later, that would look as if swapcontext() returned 0.
+  // We need to clear shadow for ucp once again, as it may be in arbitrary
+  // state.
+  ClearShadowMemoryForContext(ucp);
+  return res;
+}
+#endif
+
 INTERCEPTOR(void, longjmp, void *env, int val) {
   __asan_handle_no_return();
   REAL(longjmp)(env, val);
@@ -155,6 +177,25 @@
 }
 #endif
 
+#if ASAN_INTERCEPT_PRCTL
+#define PR_SET_NAME 15
+INTERCEPTOR(int, prctl, int option,
+            unsigned long arg2, unsigned long arg3,  // NOLINT
+            unsigned long arg4, unsigned long arg5) {  // NOLINT
+  int res = REAL(prctl(option, arg2, arg3, arg4, arg5));
+  if (option == PR_SET_NAME) {
+    AsanThread *t = asanThreadRegistry().GetCurrent();
+    if (t) {
+      char buff[17];
+      internal_strncpy(buff, (char*)arg2, 16);
+      buff[16] = 0;
+      t->summary()->set_name(buff);
+    }
+  }
+  return res;
+}
+#endif
+
 #if ASAN_INTERCEPT___CXA_THROW
 INTERCEPTOR(void, __cxa_throw, void *a, void *b, void *c) {
   CHECK(REAL(__cxa_throw));
@@ -236,8 +277,8 @@
       // See http://llvm.org/bugs/show_bug.cgi?id=11763.
       CHECK_RANGES_OVERLAP("memcpy", to, size, from, size);
     }
-    ASAN_WRITE_RANGE(from, size);
-    ASAN_READ_RANGE(to, size);
+    ASAN_READ_RANGE(from, size);
+    ASAN_WRITE_RANGE(to, size);
   }
 #if MAC_INTERPOSE_FUNCTIONS
   // Interposing of resolver functions is broken on Mac OS 10.7 and 10.8.
@@ -255,8 +296,8 @@
   }
   ENSURE_ASAN_INITED();
   if (flags()->replace_intrin) {
-    ASAN_WRITE_RANGE(from, size);
-    ASAN_READ_RANGE(to, size);
+    ASAN_READ_RANGE(from, size);
+    ASAN_WRITE_RANGE(to, size);
   }
 #if MAC_INTERPOSE_FUNCTIONS
   // Interposing of resolver functions is broken on Mac OS 10.7 and 10.8.
@@ -689,12 +730,18 @@
   ASAN_INTERCEPT_FUNC(sigaction);
   ASAN_INTERCEPT_FUNC(signal);
 #endif
+#if ASAN_INTERCEPT_SWAPCONTEXT
+  ASAN_INTERCEPT_FUNC(swapcontext);
+#endif
 #if ASAN_INTERCEPT__LONGJMP
   ASAN_INTERCEPT_FUNC(_longjmp);
 #endif
 #if ASAN_INTERCEPT_SIGLONGJMP
   ASAN_INTERCEPT_FUNC(siglongjmp);
 #endif
+#if ASAN_INTERCEPT_PRCTL
+  ASAN_INTERCEPT_FUNC(prctl);
+#endif
 
   // Intercept exception handling functions.
 #if ASAN_INTERCEPT___CXA_THROW
diff --git a/lib/asan/asan_internal.h b/lib/asan/asan_internal.h
index f9a6149..468d997 100644
--- a/lib/asan/asan_internal.h
+++ b/lib/asan/asan_internal.h
@@ -116,6 +116,7 @@
 void SetAlternateSignalStack();
 void UnsetAlternateSignalStack();
 void InstallSignalHandlers();
+void ClearShadowMemoryForContext(void *context);
 void AsanPlatformThreadInit();
 
 // Wrapper for TLS/TSD.
@@ -144,6 +145,15 @@
 # define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true
 #endif  // __APPLE__
 
+// Add convenient macro for interface functions that may be represented as
+// weak hooks.
+#define ASAN_MALLOC_HOOK(ptr, size) \
+  if (&__asan_malloc_hook) __asan_malloc_hook(ptr, size)
+#define ASAN_FREE_HOOK(ptr) \
+  if (&__asan_free_hook) __asan_free_hook(ptr)
+#define ASAN_ON_ERROR() \
+  if (&__asan_on_error) __asan_on_error()
+
 extern int asan_inited;
 // Used to avoid infinite recursion in __asan_init().
 extern bool asan_init_is_running;
@@ -160,6 +170,7 @@
 const int kAsanStackAfterReturnMagic = 0xf5;
 const int kAsanInitializationOrderMagic = 0xf6;
 const int kAsanUserPoisonedMemoryMagic = 0xf7;
+const int kAsanStackUseAfterScopeMagic = 0xf8;
 const int kAsanGlobalRedzoneMagic = 0xf9;
 const int kAsanInternalHeapMagic = 0xfe;
 
diff --git a/lib/asan/asan_linux.cc b/lib/asan/asan_linux.cc
index 5c52ddc..068f7b1 100644
--- a/lib/asan/asan_linux.cc
+++ b/lib/asan/asan_linux.cc
@@ -68,6 +68,27 @@
   *pc = ucontext->uc_mcontext.gregs[REG_EIP];
   *bp = ucontext->uc_mcontext.gregs[REG_EBP];
   *sp = ucontext->uc_mcontext.gregs[REG_ESP];
+# elif defined(__powerpc__) || defined(__powerpc64__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+  *pc = ucontext->uc_mcontext.regs->nip;
+  *sp = ucontext->uc_mcontext.regs->gpr[PT_R1];
+  // The powerpc{,64}-linux ABIs do not specify r31 as the frame
+  // pointer, but GCC always uses r31 when we need a frame pointer.
+  *bp = ucontext->uc_mcontext.regs->gpr[PT_R31];
+# elif defined(__sparc__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+  uptr *stk_ptr;
+# if defined (__arch64__)
+  *pc = ucontext->uc_mcontext.mc_gregs[MC_PC];
+  *sp = ucontext->uc_mcontext.mc_gregs[MC_O6];
+  stk_ptr = (uptr *) (*sp + 2047);
+  *bp = stk_ptr[15];
+# else
+  *pc = ucontext->uc_mcontext.gregs[REG_PC];
+  *sp = ucontext->uc_mcontext.gregs[REG_O6];
+  stk_ptr = (uptr *) *sp;
+  *bp = stk_ptr[15];
+# endif
 #else
 # error "Unsupported arch"
 #endif
@@ -137,8 +158,12 @@
   stack->trace[0] = pc;
   if ((max_s) > 1) {
     stack->max_size = max_s;
-#ifdef __arm__
+#if defined(__arm__) || \
+    defined(__powerpc__) || defined(__powerpc64__) || \
+    defined(__sparc__)
     _Unwind_Backtrace(Unwind_Trace, stack);
+    // Pop off the two ASAN functions from the backtrace.
+    stack->PopStackFrames(2);
 #else
     if (!asan_inited) return;
     if (AsanThread *t = asanThreadRegistry().GetCurrent())
@@ -147,6 +172,24 @@
   }
 }
 
+#if !ASAN_ANDROID
+void ClearShadowMemoryForContext(void *context) {
+  ucontext_t *ucp = (ucontext_t*)context;
+  uptr sp = (uptr)ucp->uc_stack.ss_sp;
+  uptr size = ucp->uc_stack.ss_size;
+  // Align to page size.
+  uptr PageSize = GetPageSizeCached();
+  uptr bottom = sp & ~(PageSize - 1);
+  size += sp - bottom;
+  size = RoundUpTo(size, PageSize);
+  PoisonShadow(bottom, size, 0);
+}
+#else
+void ClearShadowMemoryForContext(void *context) {
+  UNIMPLEMENTED();
+}
+#endif
+
 }  // namespace __asan
 
 #endif  // __linux__
diff --git a/lib/asan/asan_mac.cc b/lib/asan/asan_mac.cc
index 3ff8470..7abe9a4 100644
--- a/lib/asan/asan_mac.cc
+++ b/lib/asan/asan_mac.cc
@@ -42,7 +42,7 @@
 
 void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
   ucontext_t *ucontext = (ucontext_t*)context;
-# if __WORDSIZE == 64
+# if SANITIZER_WORDSIZE == 64
   *pc = ucontext->uc_mcontext->__ss.__rip;
   *bp = ucontext->uc_mcontext->__ss.__rbp;
   *sp = ucontext->uc_mcontext->__ss.__rsp;
@@ -50,7 +50,7 @@
   *pc = ucontext->uc_mcontext->__ss.__eip;
   *bp = ucontext->uc_mcontext->__ss.__ebp;
   *sp = ucontext->uc_mcontext->__ss.__esp;
-# endif  // __WORDSIZE
+# endif  // SANITIZER_WORDSIZE
 }
 
 int GetMacosVersion() {
@@ -171,6 +171,10 @@
   }
 }
 
+void ClearShadowMemoryForContext(void *context) {
+  UNIMPLEMENTED();
+}
+
 // The range of pages to be used for escape islands.
 // TODO(glider): instead of mapping a fixed range we must find a range of
 // unmapped pages in vmmap and take them.
@@ -179,12 +183,12 @@
 // kHighMemBeg or kHighMemEnd.
 static void *island_allocator_pos = 0;
 
-#if __WORDSIZE == 32
-# define kIslandEnd (0xffdf0000 - kPageSize)
-# define kIslandBeg (kIslandEnd - 256 * kPageSize)
+#if SANITIZER_WORDSIZE == 32
+# define kIslandEnd (0xffdf0000 - GetPageSizeCached())
+# define kIslandBeg (kIslandEnd - 256 * GetPageSizeCached())
 #else
-# define kIslandEnd (0x7fffffdf0000 - kPageSize)
-# define kIslandBeg (kIslandEnd - 256 * kPageSize)
+# define kIslandEnd (0x7fffffdf0000 - GetPageSizeCached())
+# define kIslandBeg (kIslandEnd - 256 * GetPageSizeCached())
 #endif
 
 extern "C"
@@ -208,7 +212,7 @@
     internal_memset(island_allocator_pos, 0xCC, kIslandEnd - kIslandBeg);
   };
   *ptr = island_allocator_pos;
-  island_allocator_pos = (char*)island_allocator_pos + kPageSize;
+  island_allocator_pos = (char*)island_allocator_pos + GetPageSizeCached();
   if (flags()->verbosity) {
     Report("Branch island allocated at %p\n", *ptr);
   }
@@ -381,7 +385,7 @@
                                asan_dispatch_call_block_and_release);
 }
 
-#if MAC_INTERPOSE_FUNCTIONS
+#if MAC_INTERPOSE_FUNCTIONS && !defined(MISSING_BLOCKS_SUPPORT)
 // dispatch_async, dispatch_group_async and others tailcall the corresponding
 // dispatch_*_f functions. When wrapping functions with mach_override, those
 // dispatch_*_f are intercepted automatically. But with dylib interposition
diff --git a/lib/asan/asan_malloc_mac.cc b/lib/asan/asan_malloc_mac.cc
index 5b47e12..b32c18e 100644
--- a/lib/asan/asan_malloc_mac.cc
+++ b/lib/asan/asan_malloc_mac.cc
@@ -165,7 +165,7 @@
     return malloc_zone_valloc(system_malloc_zone, size);
   }
   GET_STACK_TRACE_HERE_FOR_MALLOC;
-  return asan_memalign(kPageSize, size, &stack);
+  return asan_memalign(GetPageSizeCached(), size, &stack);
 }
 
 #define GET_ZONE_FOR_PTR(ptr) \
diff --git a/lib/asan/asan_mapping.h b/lib/asan/asan_mapping.h
index 40e3220..3a5f88b 100644
--- a/lib/asan/asan_mapping.h
+++ b/lib/asan/asan_mapping.h
@@ -30,10 +30,14 @@
 #  define SHADOW_OFFSET (0)
 # else
 #  define SHADOW_SCALE (3)
-#  if __WORDSIZE == 32
+#  if SANITIZER_WORDSIZE == 32
 #   define SHADOW_OFFSET (1 << 29)
 #  else
-#   define SHADOW_OFFSET (1ULL << 44)
+#   if defined(__powerpc64__)
+#    define SHADOW_OFFSET (1ULL << 41)
+#   else
+#    define SHADOW_OFFSET (1ULL << 44)
+#   endif
 #  endif
 # endif
 #endif  // ASAN_FLEXIBLE_MAPPING_AND_OFFSET
@@ -42,11 +46,15 @@
 #define MEM_TO_SHADOW(mem) (((mem) >> SHADOW_SCALE) | (SHADOW_OFFSET))
 #define SHADOW_TO_MEM(shadow) (((shadow) - SHADOW_OFFSET) << SHADOW_SCALE)
 
-#if __WORDSIZE == 64
+#if SANITIZER_WORDSIZE == 64
+# if defined(__powerpc64__)
+  static const uptr kHighMemEnd = 0x00000fffffffffffUL;
+# else
   static const uptr kHighMemEnd = 0x00007fffffffffffUL;
-#else  // __WORDSIZE == 32
+# endif
+#else  // SANITIZER_WORDSIZE == 32
   static const uptr kHighMemEnd = 0xffffffff;
-#endif  // __WORDSIZE
+#endif  // SANITIZER_WORDSIZE
 
 
 #define kLowMemBeg      0
@@ -60,7 +68,12 @@
 #define kHighShadowBeg  MEM_TO_SHADOW(kHighMemBeg)
 #define kHighShadowEnd  MEM_TO_SHADOW(kHighMemEnd)
 
-#define kShadowGapBeg   (kLowShadowEnd ? kLowShadowEnd + 1 : 16 * kPageSize)
+// With the zero shadow base we can not actually map pages starting from 0.
+// This constant is somewhat arbitrary.
+#define kZeroBaseShadowStart (1 << 18)
+
+#define kShadowGapBeg   (kLowShadowEnd ? kLowShadowEnd + 1 \
+                                       : kZeroBaseShadowStart)
 #define kShadowGapEnd   (kHighShadowBeg - 1)
 
 #define kGlobalAndStackRedzone \
diff --git a/lib/asan/asan_new_delete.cc b/lib/asan/asan_new_delete.cc
index cfe6ca5..756810d 100644
--- a/lib/asan/asan_new_delete.cc
+++ b/lib/asan/asan_new_delete.cc
@@ -17,7 +17,6 @@
 #include "asan_stack.h"
 
 #include <stddef.h>
-#include <new>
 
 namespace __asan {
 // This function is a no-op. We need it to make sure that object file
@@ -31,34 +30,37 @@
 // On Android new() goes through malloc interceptors.
 #if !ASAN_ANDROID
 
+// Fake std::nothrow_t to avoid including <new>.
+namespace std {
+struct nothrow_t {};
+}  // namespace std
+
 #define OPERATOR_NEW_BODY \
   GET_STACK_TRACE_HERE_FOR_MALLOC;\
   return asan_memalign(0, size, &stack);
 
 INTERCEPTOR_ATTRIBUTE
-void *operator new(size_t size) throw(std::bad_alloc) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size) { OPERATOR_NEW_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void *operator new[](size_t size) throw(std::bad_alloc) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size) { OPERATOR_NEW_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void *operator new(size_t size, std::nothrow_t const&) throw()
-{ OPERATOR_NEW_BODY; }
+void *operator new(size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void *operator new[](size_t size, std::nothrow_t const&) throw()
-{ OPERATOR_NEW_BODY; }
+void *operator new[](size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
 
 #define OPERATOR_DELETE_BODY \
   GET_STACK_TRACE_HERE_FOR_FREE(ptr);\
   asan_free(ptr, &stack);
 
 INTERCEPTOR_ATTRIBUTE
-void operator delete(void *ptr) throw() { OPERATOR_DELETE_BODY; }
+void operator delete(void *ptr) { OPERATOR_DELETE_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void operator delete[](void *ptr) throw() { OPERATOR_DELETE_BODY; }
+void operator delete[](void *ptr) { OPERATOR_DELETE_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void operator delete(void *ptr, std::nothrow_t const&) throw()
+void operator delete(void *ptr, std::nothrow_t const&)
 { OPERATOR_DELETE_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void operator delete[](void *ptr, std::nothrow_t const&) throw()
+void operator delete[](void *ptr, std::nothrow_t const&)
 { OPERATOR_DELETE_BODY; }
 
 #endif
diff --git a/lib/asan/asan_poisoning.cc b/lib/asan/asan_poisoning.cc
index ce9de92..dcdc7f8 100644
--- a/lib/asan/asan_poisoning.cc
+++ b/lib/asan/asan_poisoning.cc
@@ -151,3 +151,40 @@
 bool __asan_address_is_poisoned(void const volatile *addr) {
   return __asan::AddressIsPoisoned((uptr)addr);
 }
+
+// This is a simplified version of __asan_(un)poison_memory_region, which
+// assumes that left border of region to be poisoned is properly aligned.
+static void PoisonAlignedStackMemory(uptr addr, uptr size, bool do_poison) {
+  if (size == 0) return;
+  uptr aligned_size = size & ~(SHADOW_GRANULARITY - 1);
+  PoisonShadow(addr, aligned_size,
+               do_poison ? kAsanStackUseAfterScopeMagic : 0);
+  if (size == aligned_size)
+    return;
+  s8 end_offset = (s8)(size - aligned_size);
+  s8* shadow_end = (s8*)MemToShadow(addr + aligned_size);
+  s8 end_value = *shadow_end;
+  if (do_poison) {
+    // If possible, mark all the bytes mapping to last shadow byte as
+    // unaddressable.
+    if (end_value > 0 && end_value <= end_offset)
+      *shadow_end = kAsanStackUseAfterScopeMagic;
+  } else {
+    // If necessary, mark few first bytes mapping to last shadow byte
+    // as addressable
+    if (end_value != 0)
+      *shadow_end = Max(end_value, end_offset);
+  }
+}
+
+void __asan_poison_stack_memory(uptr addr, uptr size) {
+  if (flags()->verbosity > 0)
+    Report("poisoning: %p %zx\n", (void*)addr, size);
+  PoisonAlignedStackMemory(addr, size, true);
+}
+
+void __asan_unpoison_stack_memory(uptr addr, uptr size) {
+  if (flags()->verbosity > 0)
+    Report("unpoisoning: %p %zx\n", (void*)addr, size);
+  PoisonAlignedStackMemory(addr, size, false);
+}
diff --git a/lib/asan/asan_report.cc b/lib/asan/asan_report.cc
index 2fbf8fd..558e4f2 100644
--- a/lib/asan/asan_report.cc
+++ b/lib/asan/asan_report.cc
@@ -44,7 +44,7 @@
 
 static void PrintBytes(const char *before, uptr *a) {
   u8 *bytes = (u8*)a;
-  uptr byte_num = (__WORDSIZE) / 8;
+  uptr byte_num = (SANITIZER_WORDSIZE) / 8;
   Printf("%s%p:", before, (void*)a);
   for (uptr i = 0; i < byte_num; i++) {
     Printf(" %x%x", bytes[i] >> 4, bytes[i] & 15);
@@ -180,7 +180,7 @@
     Printf("    [%zu, %zu) '%s'\n", beg, beg + size, buf);
   }
   Printf("HINT: this may be a false positive if your program uses "
-             "some custom stack unwind mechanism\n"
+             "some custom stack unwind mechanism or swapcontext\n"
              "      (longjmp and C++ exceptions *are* supported)\n");
   DescribeThread(t->summary());
   return true;
@@ -203,6 +203,25 @@
          (void*)(chunk.Beg()), (void*)(chunk.End()));
 }
 
+// Return " (thread_name) " or an empty string if the name is empty.
+const char *ThreadNameWithParenthesis(AsanThreadSummary *t, char buff[],
+                                      uptr buff_len) {
+  const char *name = t->name();
+  if (*name == 0) return "";
+  buff[0] = 0;
+  internal_strncat(buff, " (", 3);
+  internal_strncat(buff, name, buff_len - 4);
+  internal_strncat(buff, ")", 2);
+  return buff;
+}
+
+const char *ThreadNameWithParenthesis(u32 tid, char buff[],
+                                      uptr buff_len) {
+  if (tid == kInvalidTid) return "";
+  AsanThreadSummary *t = asanThreadRegistry().FindByTid(tid);
+  return ThreadNameWithParenthesis(t, buff, buff_len);
+}
+
 void DescribeHeapAddress(uptr addr, uptr access_size) {
   AsanChunkView chunk = FindHeapChunkByAddress(addr);
   if (!chunk.IsValid()) return;
@@ -214,20 +233,25 @@
   chunk.GetAllocStack(&alloc_stack);
   AsanThread *t = asanThreadRegistry().GetCurrent();
   CHECK(t);
+  char tname[128];
   if (chunk.FreeTid() != kInvalidTid) {
     AsanThreadSummary *free_thread =
         asanThreadRegistry().FindByTid(chunk.FreeTid());
-    Printf("freed by thread T%d here:\n", free_thread->tid());
+    Printf("freed by thread T%d%s here:\n", free_thread->tid(),
+           ThreadNameWithParenthesis(free_thread, tname, sizeof(tname)));
     StackTrace free_stack;
     chunk.GetFreeStack(&free_stack);
     PrintStack(&free_stack);
-    Printf("previously allocated by thread T%d here:\n", alloc_thread->tid());
+    Printf("previously allocated by thread T%d%s here:\n",
+           alloc_thread->tid(),
+           ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)));
     PrintStack(&alloc_stack);
     DescribeThread(t->summary());
     DescribeThread(free_thread);
     DescribeThread(alloc_thread);
   } else {
-    Printf("allocated by thread T%d here:\n", alloc_thread->tid());
+    Printf("allocated by thread T%d%s here:\n", alloc_thread->tid(),
+           ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)));
     PrintStack(&alloc_stack);
     DescribeThread(t->summary());
     DescribeThread(alloc_thread);
@@ -256,8 +280,13 @@
     return;
   }
   summary->set_announced(true);
-  Printf("Thread T%d created by T%d here:\n",
-         summary->tid(), summary->parent_tid());
+  char tname[128];
+  Printf("Thread T%d%s", summary->tid(),
+         ThreadNameWithParenthesis(summary->tid(), tname, sizeof(tname)));
+  Printf(" created by T%d%s here:\n",
+         summary->parent_tid(),
+         ThreadNameWithParenthesis(summary->parent_tid(),
+                                   tname, sizeof(tname)));
   PrintStack(summary->stack());
   // Recursively described parent thread if needed.
   if (flags()->print_full_thread_history) {
@@ -289,9 +318,11 @@
         // an error report will finish doing it.
         SleepForSeconds(Max(100, flags()->sleep_before_dying + 1));
       }
-      Die();
+      // If we're still not dead for some reason, use raw Exit() instead of
+      // Die() to bypass any additional checks.
+      Exit(flags()->exitcode);
     }
-    __asan_on_error();
+    ASAN_ON_ERROR();
     reporting_thread_tid = asanThreadRegistry().GetCurrentTidOrInvalid();
     Printf("===================================================="
            "=============\n");
@@ -455,6 +486,9 @@
       case kAsanUserPoisonedMemoryMagic:
         bug_descr = "use-after-poison";
         break;
+      case kAsanStackUseAfterScopeMagic:
+        bug_descr = "stack-use-after-scope";
+        break;
       case kAsanGlobalRedzoneMagic:
         bug_descr = "global-buffer-overflow";
         break;
@@ -466,9 +500,11 @@
              bug_descr, (void*)addr, pc, bp, sp);
 
   u32 curr_tid = asanThreadRegistry().GetCurrentTidOrInvalid();
-  Printf("%s of size %zu at %p thread T%d\n",
+  char tname[128];
+  Printf("%s of size %zu at %p thread T%d%s\n",
              access_size ? (is_write ? "WRITE" : "READ") : "ACCESS",
-             access_size, (void*)addr, curr_tid);
+             access_size, (void*)addr, curr_tid,
+             ThreadNameWithParenthesis(curr_tid, tname, sizeof(tname)));
 
   GET_STACK_TRACE_WITH_PC_AND_BP(kStackTraceMax, pc, bp);
   PrintStack(&stack);
@@ -488,7 +524,9 @@
   }
 }
 
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
 // Provide default implementation of __asan_on_error that does nothing
 // and may be overriden by user.
 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE NOINLINE
 void __asan_on_error() {}
+#endif
diff --git a/lib/asan/asan_rtl.cc b/lib/asan/asan_rtl.cc
index 14667e6..37c9583 100644
--- a/lib/asan/asan_rtl.cc
+++ b/lib/asan/asan_rtl.cc
@@ -66,6 +66,10 @@
   return &asan_flags;
 }
 
+static const char *MaybeCallAsanDefaultOptions() {
+  return (&__asan_default_options) ? __asan_default_options() : "";
+}
+
 static void ParseFlagsFromString(Flags *f, const char *str) {
   ParseFlag(str, &f->quarantine_size, "quarantine_size");
   ParseFlag(str, &f->symbolize, "symbolize");
@@ -102,12 +106,6 @@
   ParseFlag(str, &f->log_path, "log_path");
 }
 
-extern "C" {
-SANITIZER_WEAK_ATTRIBUTE
-SANITIZER_INTERFACE_ATTRIBUTE
-const char* __asan_default_options() { return ""; }
-}  // extern "C"
-
 void InitializeFlags(Flags *f, const char *env) {
   internal_memset(f, 0, sizeof(*f));
 
@@ -134,17 +132,17 @@
   f->unmap_shadow_on_exit = false;
   f->abort_on_error = false;
   f->atexit = false;
-  f->disable_core = (__WORDSIZE == 64);
+  f->disable_core = (SANITIZER_WORDSIZE == 64);
   f->strip_path_prefix = "";
   f->allow_reexec = true;
   f->print_full_thread_history = true;
   f->log_path = 0;
 
   // Override from user-specified string.
-  ParseFlagsFromString(f, __asan_default_options());
+  ParseFlagsFromString(f, MaybeCallAsanDefaultOptions());
   if (flags()->verbosity) {
     Report("Using the defaults from __asan_default_options: %s\n",
-           __asan_default_options());
+           MaybeCallAsanDefaultOptions());
   }
 
   // Override from command line.
@@ -165,8 +163,8 @@
 // ---------------------- mmap -------------------- {{{1
 // Reserve memory range [beg, end].
 static void ReserveShadowMemoryRange(uptr beg, uptr end) {
-  CHECK((beg % kPageSize) == 0);
-  CHECK(((end + 1) % kPageSize) == 0);
+  CHECK((beg % GetPageSizeCached()) == 0);
+  CHECK(((end + 1) % GetPageSizeCached()) == 0);
   uptr size = end - beg + 1;
   void *res = MmapFixedNoReserve(beg, size);
   if (res != (void*)beg) {
@@ -241,13 +239,10 @@
     case 27: __asan_set_error_exit_code(0); break;
     case 28: __asan_stack_free(0, 0, 0); break;
     case 29: __asan_stack_malloc(0, 0); break;
-    case 30: __asan_on_error(); break;
-    case 31: __asan_default_options(); break;
-    case 32: __asan_before_dynamic_init(0, 0); break;
-    case 33: __asan_after_dynamic_init(); break;
-    case 34: __asan_malloc_hook(0, 0); break;
-    case 35: __asan_free_hook(0); break;
-    case 36: __asan_symbolize(0, 0, 0); break;
+    case 30: __asan_before_dynamic_init(0, 0); break;
+    case 31: __asan_after_dynamic_init(); break;
+    case 32: __asan_poison_stack_memory(0, 0); break;
+    case 33: __asan_unpoison_stack_memory(0, 0); break;
   }
 }
 
@@ -261,6 +256,13 @@
 // ---------------------- Interface ---------------- {{{1
 using namespace __asan;  // NOLINT
 
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+extern "C" {
+SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
+const char* __asan_default_options() { return ""; }
+}  // extern "C"
+#endif
+
 int NOINLINE __asan_set_error_exit_code(int exit_code) {
   int old = flags()->exitcode;
   flags()->exitcode = exit_code;
@@ -271,8 +273,9 @@
   int local_stack;
   AsanThread *curr_thread = asanThreadRegistry().GetCurrent();
   CHECK(curr_thread);
+  uptr PageSize = GetPageSizeCached();
   uptr top = curr_thread->stack_top();
-  uptr bottom = ((uptr)&local_stack - kPageSize) & ~(kPageSize-1);
+  uptr bottom = ((uptr)&local_stack - PageSize) & ~(PageSize-1);
   PoisonShadow(bottom, top - bottom, 0);
 }
 
@@ -349,12 +352,13 @@
   }
 
   uptr shadow_start = kLowShadowBeg;
-  if (kLowShadowBeg > 0) shadow_start -= kMmapGranularity;
+  if (kLowShadowBeg > 0) shadow_start -= GetMmapGranularity();
   uptr shadow_end = kHighShadowEnd;
   if (MemoryRangeIsAvailable(shadow_start, shadow_end)) {
     if (kLowShadowBeg != kLowShadowEnd) {
       // mmap the low shadow plus at least one page.
-      ReserveShadowMemoryRange(kLowShadowBeg - kMmapGranularity, kLowShadowEnd);
+      ReserveShadowMemoryRange(kLowShadowBeg - GetMmapGranularity(),
+                               kLowShadowEnd);
     }
     // mmap the high shadow.
     ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd);
diff --git a/lib/asan/asan_stack.cc b/lib/asan/asan_stack.cc
index 53a4647..ebf22fd 100644
--- a/lib/asan/asan_stack.cc
+++ b/lib/asan/asan_stack.cc
@@ -17,9 +17,15 @@
 
 namespace __asan {
 
+static bool MaybeCallAsanSymbolize(const void *pc, char *out_buffer,
+                                   int out_size) {
+  return (&__asan_symbolize) ? __asan_symbolize(pc, out_buffer, out_size)
+                             : false;
+}
+
 void PrintStack(StackTrace *stack) {
   stack->PrintStack(stack->trace, stack->size, flags()->symbolize,
-                    flags()->strip_path_prefix, __asan_symbolize);
+                    flags()->strip_path_prefix, MaybeCallAsanSymbolize);
 }
 
 }  // namespace __asan
@@ -29,7 +35,7 @@
 // Provide default implementation of __asan_symbolize that does nothing
 // and may be overriden by user if he wants to use his own symbolization.
 // ASan on Windows has its own implementation of this.
-#ifndef _WIN32
+#if !defined(_WIN32) && !SANITIZER_SUPPORTS_WEAK_HOOKS
 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE NOINLINE
 bool __asan_symbolize(const void *pc, char *out_buffer, int out_size) {
   return false;
diff --git a/lib/asan/asan_stats.cc b/lib/asan/asan_stats.cc
index 189315b..cf8cadf 100644
--- a/lib/asan/asan_stats.cc
+++ b/lib/asan/asan_stats.cc
@@ -43,7 +43,7 @@
   Printf("Stats: %zuM really freed by %zu calls\n",
              really_freed>>20, real_frees);
   Printf("Stats: %zuM (%zu full pages) mmaped in %zu calls\n",
-             mmaped>>20, mmaped / kPageSize, mmaps);
+             mmaped>>20, mmaped / GetPageSizeCached(), mmaps);
 
   PrintMallocStatsArray("  mmaps   by size class: ", mmaped_by_size);
   PrintMallocStatsArray("  mallocs by size class: ", malloced_by_size);
@@ -56,7 +56,8 @@
 static AsanLock print_lock(LINKER_INITIALIZED);
 
 static void PrintAccumulatedStats() {
-  AsanStats stats = asanThreadRegistry().GetAccumulatedStats();
+  AsanStats stats;
+  asanThreadRegistry().GetAccumulatedStats(&stats);
   // Use lock to keep reports from mixing up.
   ScopedLock lock(&print_lock);
   stats.Print();
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc
index bdb5022..a77e435 100644
--- a/lib/asan/asan_thread.cc
+++ b/lib/asan/asan_thread.cc
@@ -28,15 +28,16 @@
 
 AsanThread *AsanThread::Create(u32 parent_tid, thread_callback_t start_routine,
                                void *arg, StackTrace *stack) {
-  uptr size = RoundUpTo(sizeof(AsanThread), kPageSize);
+  uptr PageSize = GetPageSizeCached();
+  uptr size = RoundUpTo(sizeof(AsanThread), PageSize);
   AsanThread *thread = (AsanThread*)MmapOrDie(size, __FUNCTION__);
   thread->start_routine_ = start_routine;
   thread->arg_ = arg;
 
-  const uptr kSummaryAllocSize = kPageSize;
+  const uptr kSummaryAllocSize = PageSize;
   CHECK_LE(sizeof(AsanThreadSummary), kSummaryAllocSize);
   AsanThreadSummary *summary =
-      (AsanThreadSummary*)MmapOrDie(kPageSize, "AsanThreadSummary");
+      (AsanThreadSummary*)MmapOrDie(PageSize, "AsanThreadSummary");
   summary->Init(parent_tid, stack);
   summary->set_thread(thread);
   thread->set_summary(summary);
@@ -66,7 +67,7 @@
   // and we don't want it to have any poisoned stack.
   ClearShadowForThreadStack();
   fake_stack().Cleanup();
-  uptr size = RoundUpTo(sizeof(AsanThread), kPageSize);
+  uptr size = RoundUpTo(sizeof(AsanThread), GetPageSizeCached());
   UnmapOrDie(this, size);
 }
 
@@ -118,25 +119,25 @@
 
 const char *AsanThread::GetFrameNameByAddr(uptr addr, uptr *offset) {
   uptr bottom = 0;
-  bool is_fake_stack = false;
   if (AddrIsInStack(addr)) {
     bottom = stack_bottom();
   } else {
     bottom = fake_stack().AddrIsInFakeStack(addr);
     CHECK(bottom);
-    is_fake_stack = true;
+    *offset = addr - bottom;
+    return  (const char *)((uptr*)bottom)[1];
   }
-  uptr aligned_addr = addr & ~(__WORDSIZE/8 - 1);  // align addr.
+  uptr aligned_addr = addr & ~(SANITIZER_WORDSIZE/8 - 1);  // align addr.
   u8 *shadow_ptr = (u8*)MemToShadow(aligned_addr);
   u8 *shadow_bottom = (u8*)MemToShadow(bottom);
 
   while (shadow_ptr >= shadow_bottom &&
-      *shadow_ptr != kAsanStackLeftRedzoneMagic) {
+         *shadow_ptr != kAsanStackLeftRedzoneMagic) {
     shadow_ptr--;
   }
 
   while (shadow_ptr >= shadow_bottom &&
-      *shadow_ptr == kAsanStackLeftRedzoneMagic) {
+         *shadow_ptr == kAsanStackLeftRedzoneMagic) {
     shadow_ptr--;
   }
 
@@ -146,8 +147,7 @@
   }
 
   uptr* ptr = (uptr*)SHADOW_TO_MEM((uptr)(shadow_ptr + 1));
-  CHECK((ptr[0] == kCurrentStackFrameMagic) ||
-      (is_fake_stack && ptr[0] == kRetiredStackFrameMagic));
+  CHECK(ptr[0] == kCurrentStackFrameMagic);
   *offset = addr - (uptr)ptr;
   return (const char*)ptr[1];
 }
diff --git a/lib/asan/asan_thread.h b/lib/asan/asan_thread.h
index 4d4c439..acc27e5 100644
--- a/lib/asan/asan_thread.h
+++ b/lib/asan/asan_thread.h
@@ -39,6 +39,7 @@
       internal_memcpy(&stack_, stack, sizeof(*stack));
     }
     thread_ = 0;
+    name_[0] = 0;
   }
   u32 tid() { return tid_; }
   void set_tid(u32 tid) { tid_ = tid; }
@@ -49,6 +50,10 @@
   AsanThread *thread() { return thread_; }
   void set_thread(AsanThread *thread) { thread_ = thread; }
   static void TSDDtor(void *tsd);
+  void set_name(const char *name) {
+    internal_strncpy(name_, name, sizeof(name_) - 1);
+  }
+  const char *name() { return name_; }
 
  private:
   u32 tid_;
@@ -56,8 +61,12 @@
   bool announced_;
   StackTrace stack_;
   AsanThread *thread_;
+  char name_[128];
 };
 
+// AsanThreadSummary objects are never freed, so we need many of them.
+COMPILER_CHECK(sizeof(AsanThreadSummary) <= 4094);
+
 // AsanThread are stored in TSD and destroyed when the thread dies.
 class AsanThread {
  public:
diff --git a/lib/asan/asan_thread_registry.cc b/lib/asan/asan_thread_registry.cc
index 4bf63cd..0e07e19 100644
--- a/lib/asan/asan_thread_registry.cc
+++ b/lib/asan/asan_thread_registry.cc
@@ -104,10 +104,10 @@
   return (t) ? t->stats() : main_thread_.stats();
 }
 
-AsanStats AsanThreadRegistry::GetAccumulatedStats() {
+void AsanThreadRegistry::GetAccumulatedStats(AsanStats *stats) {
   ScopedLock lock(&mu_);
   UpdateAccumulatedStatsUnlocked();
-  return accumulated_stats_;
+  internal_memcpy(stats, &accumulated_stats_, sizeof(accumulated_stats_));
 }
 
 uptr AsanThreadRegistry::GetCurrentAllocatedBytes() {
diff --git a/lib/asan/asan_thread_registry.h b/lib/asan/asan_thread_registry.h
index 3ad55f5..2056e73 100644
--- a/lib/asan/asan_thread_registry.h
+++ b/lib/asan/asan_thread_registry.h
@@ -47,9 +47,9 @@
   // Returns stats for GetCurrent(), or stats for
   // T0 if GetCurrent() returns 0.
   AsanStats &GetCurrentThreadStats();
-  // Flushes all thread-local stats to accumulated stats, and returns
+  // Flushes all thread-local stats to accumulated stats, and makes
   // a copy of accumulated stats.
-  AsanStats GetAccumulatedStats();
+  void GetAccumulatedStats(AsanStats *stats);
   uptr GetCurrentAllocatedBytes();
   uptr GetHeapSize();
   uptr GetFreeBytes();
diff --git a/lib/asan/asan_win.cc b/lib/asan/asan_win.cc
index f04c73c..e620c71 100644
--- a/lib/asan/asan_win.cc
+++ b/lib/asan/asan_win.cc
@@ -139,6 +139,10 @@
   // Nothing here for now.
 }
 
+void ClearShadowMemoryForContext(void *context) {
+  UNIMPLEMENTED();
+}
+
 }  // namespace __asan
 
 // ---------------------- Interface ---------------- {{{1
diff --git a/lib/asan/dynamic/asan_interceptors_dynamic.cc b/lib/asan/dynamic/asan_interceptors_dynamic.cc
index df625b0..4f0f7bd 100644
--- a/lib/asan/dynamic/asan_interceptors_dynamic.cc
+++ b/lib/asan/dynamic/asan_interceptors_dynamic.cc
@@ -91,13 +91,13 @@
   INTERPOSE_FUNCTION(dispatch_after_f),
   INTERPOSE_FUNCTION(dispatch_barrier_async_f),
   INTERPOSE_FUNCTION(dispatch_group_async_f),
-
+#ifndef MISSING_BLOCKS_SUPPORT
   INTERPOSE_FUNCTION(dispatch_group_async),
   INTERPOSE_FUNCTION(dispatch_async),
   INTERPOSE_FUNCTION(dispatch_after),
   INTERPOSE_FUNCTION(dispatch_source_set_event_handler),
   INTERPOSE_FUNCTION(dispatch_source_set_cancel_handler),
-
+#endif
   INTERPOSE_FUNCTION(signal),
   INTERPOSE_FUNCTION(sigaction),
 
diff --git a/lib/asan/lit_tests/Helpers/initialization-blacklist-extra.cc b/lib/asan/lit_tests/Helpers/initialization-blacklist-extra.cc
new file mode 100644
index 0000000..09aed21
--- /dev/null
+++ b/lib/asan/lit_tests/Helpers/initialization-blacklist-extra.cc
@@ -0,0 +1,15 @@
+int zero_init() { return 0; }
+int badGlobal = zero_init();
+int readBadGlobal() { return badGlobal; }
+
+namespace badNamespace {
+class BadClass {
+ public:
+  BadClass() { value = 0; }
+  int value;
+};
+// Global object with non-trivial constructor.
+BadClass bad_object;
+}  // namespace badNamespace
+
+int accessBadObject() { return badNamespace::bad_object.value; }
diff --git a/lib/asan/lit_tests/Helpers/initialization-blacklist.txt b/lib/asan/lit_tests/Helpers/initialization-blacklist.txt
new file mode 100644
index 0000000..c5f6610
--- /dev/null
+++ b/lib/asan/lit_tests/Helpers/initialization-blacklist.txt
@@ -0,0 +1,2 @@
+global-init:*badGlobal*
+global-init-type:*badNamespace::BadClass*
diff --git a/lib/asan/lit_tests/Linux/initialization-bug-any-order.cc b/lib/asan/lit_tests/Linux/initialization-bug-any-order.cc
index 6405d3c..c43b1f5 100644
--- a/lib/asan/lit_tests/Linux/initialization-bug-any-order.cc
+++ b/lib/asan/lit_tests/Linux/initialization-bug-any-order.cc
@@ -3,10 +3,10 @@
 // independently on order in which we list source files.
 
 // RUN: %clangxx_asan -m64 -O0 %s %p/../Helpers/initialization-bug-extra.cc\
-// RUN:   -mllvm -asan-initialization-order -o %t && %t 2>&1 \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1 \
 // RUN:    | %symbolize | FileCheck %s
 // RUN: %clangxx_asan -m64 -O0 %p/../Helpers/initialization-bug-extra.cc %s\
-// RUN:   -mllvm -asan-initialization-order -o %t && %t 2>&1 \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1 \
 // RUN:    | %symbolize | FileCheck %s
 
 // Do not test with optimization -- the error may be optimized away.
diff --git a/lib/asan/lit_tests/Linux/swapcontext_test.cc b/lib/asan/lit_tests/Linux/swapcontext_test.cc
new file mode 100644
index 0000000..0404b4f
--- /dev/null
+++ b/lib/asan/lit_tests/Linux/swapcontext_test.cc
@@ -0,0 +1,66 @@
+// Check that ASan plays well with easy cases of makecontext/swapcontext.
+
+// RUN: %clangxx_asan -m64 -O0 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -m64 -O1 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -m64 -O2 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -m64 -O3 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -m32 -O0 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -m32 -O1 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -m32 -O2 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -m32 -O3 %s -o %t && %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+ucontext_t orig_context;
+ucontext_t child_context;
+
+void Child(int mode) {
+  char x[32] = {0};  // Stack gets poisoned.
+  printf("Child: %p\n", x);
+  // (a) Do nothing, just return to parent function.
+  // (b) Jump into the original function. Stack remains poisoned unless we do
+  //     something.
+  if (mode == 1) {
+    if (swapcontext(&child_context, &orig_context) < 0) {
+      perror("swapcontext");
+      _exit(0);
+    }
+  }
+}
+
+int Run(int arg, int mode) {
+  const int kStackSize = 1 << 20;
+  char child_stack[kStackSize + 1];
+  printf("Child stack: %p\n", child_stack);
+  // Setup child context.
+  getcontext(&child_context);
+  child_context.uc_stack.ss_sp = child_stack;
+  child_context.uc_stack.ss_size = kStackSize / 2;
+  if (mode == 0) {
+    child_context.uc_link = &orig_context;
+  }
+  makecontext(&child_context, (void (*)())Child, 1, mode);
+  if (swapcontext(&orig_context, &child_context) < 0) {
+    perror("swapcontext");
+    return 0;
+  }
+  // Touch childs's stack to make sure it's unpoisoned.
+  for (int i = 0; i < kStackSize; i++) {
+    child_stack[i] = i;
+  }
+  return child_stack[arg];
+}
+
+int main(int argc, char **argv) {
+  // CHECK: WARNING: ASan doesn't fully support makecontext/swapcontext
+  int ret = 0;
+  ret += Run(argc - 1, 0);
+  printf("Test1 passed\n");
+  // CHECK: Test1 passed
+  ret += Run(argc - 1, 1);
+  printf("Test2 passed\n");
+  // CHECK: Test2 passed
+  return ret;
+}
diff --git a/lib/asan/lit_tests/blacklist.cc b/lib/asan/lit_tests/blacklist.cc
index ef70956..6cfc150 100644
--- a/lib/asan/lit_tests/blacklist.cc
+++ b/lib/asan/lit_tests/blacklist.cc
@@ -3,21 +3,21 @@
 // RUN: echo "fun:*brokenFunction*" > %tmp
 // RUN: echo "global:*badGlobal*" >> %tmp
 // RUN: echo "src:*blacklist-extra.cc" >> %tmp
-// RUN: %clangxx_asan -mllvm -asan-blacklist=%tmp -m64 -O0 %s -o %t \
+// RUN: %clangxx_asan -fsanitize-blacklist=%tmp -m64 -O0 %s -o %t \
 // RUN: %p/Helpers/blacklist-extra.cc && %t 2>&1
-// RUN: %clangxx_asan -mllvm -asan-blacklist=%tmp -m64 -O1 %s -o %t \
+// RUN: %clangxx_asan -fsanitize-blacklist=%tmp -m64 -O1 %s -o %t \
 // RUN: %p/Helpers/blacklist-extra.cc && %t 2>&1
-// RUN: %clangxx_asan -mllvm -asan-blacklist=%tmp -m64 -O2 %s -o %t \
+// RUN: %clangxx_asan -fsanitize-blacklist=%tmp -m64 -O2 %s -o %t \
 // RUN: %p/Helpers/blacklist-extra.cc && %t 2>&1
-// RUN: %clangxx_asan -mllvm -asan-blacklist=%tmp -m64 -O3 %s -o %t \
+// RUN: %clangxx_asan -fsanitize-blacklist=%tmp -m64 -O3 %s -o %t \
 // RUN: %p/Helpers/blacklist-extra.cc && %t 2>&1
-// RUN: %clangxx_asan -mllvm -asan-blacklist=%tmp -m32 -O0 %s -o %t \
+// RUN: %clangxx_asan -fsanitize-blacklist=%tmp -m32 -O0 %s -o %t \
 // RUN: %p/Helpers/blacklist-extra.cc && %t 2>&1
-// RUN: %clangxx_asan -mllvm -asan-blacklist=%tmp -m32 -O1 %s -o %t \
+// RUN: %clangxx_asan -fsanitize-blacklist=%tmp -m32 -O1 %s -o %t \
 // RUN: %p/Helpers/blacklist-extra.cc && %t 2>&1
-// RUN: %clangxx_asan -mllvm -asan-blacklist=%tmp -m32 -O2 %s -o %t \
+// RUN: %clangxx_asan -fsanitize-blacklist=%tmp -m32 -O2 %s -o %t \
 // RUN: %p/Helpers/blacklist-extra.cc && %t 2>&1
-// RUN: %clangxx_asan -mllvm -asan-blacklist=%tmp -m32 -O3 %s -o %t \
+// RUN: %clangxx_asan -fsanitize-blacklist=%tmp -m32 -O3 %s -o %t \
 // RUN: %p/Helpers/blacklist-extra.cc && %t 2>&1
 
 // badGlobal is accessed improperly, but we blacklisted it.
diff --git a/lib/asan/lit_tests/initialization-blacklist.cc b/lib/asan/lit_tests/initialization-blacklist.cc
new file mode 100644
index 0000000..f8df24c
--- /dev/null
+++ b/lib/asan/lit_tests/initialization-blacklist.cc
@@ -0,0 +1,32 @@
+// Test for blacklist functionality of initialization-order checker.
+
+// RUN: %clangxx_asan -m64 -O0 %s %p/Helpers/initialization-blacklist-extra.cc\
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1
+// RUN: %clangxx_asan -m64 -O1 %s %p/Helpers/initialization-blacklist-extra.cc\
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1
+// RUN: %clangxx_asan -m64 -O2 %s %p/Helpers/initialization-blacklist-extra.cc\
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1
+// RUN: %clangxx_asan -m32 -O0 %s %p/Helpers/initialization-blacklist-extra.cc\
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1
+// RUN: %clangxx_asan -m32 -O1 %s %p/Helpers/initialization-blacklist-extra.cc\
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1
+// RUN: %clangxx_asan -m32 -O2 %s %p/Helpers/initialization-blacklist-extra.cc\
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1
+
+// Function is defined in another TU.
+int readBadGlobal();
+int x = readBadGlobal();  // init-order bug.
+
+// Function is defined in another TU.
+int accessBadObject();
+int y = accessBadObject();  // init-order bug.
+
+int main(int argc, char **argv) {
+  return argc + x + y - 1;
+}
diff --git a/lib/asan/lit_tests/initialization-bug.cc b/lib/asan/lit_tests/initialization-bug.cc
index 90e4db9..8f4e33e 100644
--- a/lib/asan/lit_tests/initialization-bug.cc
+++ b/lib/asan/lit_tests/initialization-bug.cc
@@ -1,10 +1,10 @@
 // Test to make sure basic initialization order errors are caught.
 
 // RUN: %clangxx_asan -m64 -O0 %s %p/Helpers/initialization-bug-extra2.cc\
-// RUN:   -mllvm -asan-initialization-order -o %t && %t 2>&1 \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1 \
 // RUN:    | %symbolize | FileCheck %s
 // RUN: %clangxx_asan -m32 -O0 %s %p/Helpers/initialization-bug-extra2.cc\
-// RUN:   -mllvm -asan-initialization-order -o %t && %t 2>&1 \
+// RUN:   -fsanitize=init-order -o %t && %t 2>&1 \
 // RUN:     | %symbolize | FileCheck %s
 
 // Do not test with optimization -- the error may be optimized away.
diff --git a/lib/asan/lit_tests/initialization-nobug.cc b/lib/asan/lit_tests/initialization-nobug.cc
index cd68b13..1b89616 100644
--- a/lib/asan/lit_tests/initialization-nobug.cc
+++ b/lib/asan/lit_tests/initialization-nobug.cc
@@ -2,23 +2,23 @@
 // order checking.  If successful, this will just return 0.
 
 // RUN: %clangxx_asan -m64 -O0 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 // RUN: %clangxx_asan -m64 -O1 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 // RUN: %clangxx_asan -m64 -O2 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 // RUN: %clangxx_asan -m64 -O3 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 // RUN: %clangxx_asan -m32 -O0 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 // RUN: %clangxx_asan -m32 -O0 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 // RUN: %clangxx_asan -m32 -O1 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 // RUN: %clangxx_asan -m32 -O2 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 // RUN: %clangxx_asan -m32 -O3 %s %p/Helpers/initialization-nobug-extra.cc\
-// RUN:   --std=c++11 -mllvm -asan-initialization-order -o %t && %t 2>&1
+// RUN:   --std=c++11 -fsanitize=init-order -o %t && %t 2>&1
 
 // Simple access:
 // Make sure that accessing a global in the same TU is safe
diff --git a/lib/asan/lit_tests/interface_symbols.c b/lib/asan/lit_tests/interface_symbols.c
index 03998d5..f3167f5 100644
--- a/lib/asan/lit_tests/interface_symbols.c
+++ b/lib/asan/lit_tests/interface_symbols.c
@@ -1,10 +1,16 @@
 // Check the presense of interface symbols in compiled file.
 
-// RUN: %clang -faddress-sanitizer -dead_strip -O2 %s -o %t.exe
-// RUN: nm %t.exe | egrep " [TW] " | sed "s/.* T //" | sed "s/.* W //" \
-// RUN:    | grep "__asan_" | sed "s/___asan_/__asan_/" > %t.symbols
+// RUN: %clang -fsanitize=address -dead_strip -O2 %s -o %t.exe
+// RUN: nm %t.exe | grep " T " | sed "s/.* T //" \
+// RUN:    | grep "__asan_" | sed "s/___asan_/__asan_/" \
+// RUN:    | grep -v "__asan_malloc_hook" \
+// RUN:    | grep -v "__asan_free_hook" \
+// RUN:    | grep -v "__asan_symbolize" \
+// RUN:    | grep -v "__asan_default_options" \
+// RUN:    | grep -v "__asan_on_error" > %t.symbols
 // RUN: cat %p/../../../include/sanitizer/asan_interface.h \
 // RUN:    | sed "s/\/\/.*//" | sed "s/typedef.*//" \
+// RUN:    | grep -v "OPTIONAL" \
 // RUN:    | grep "__asan_.*(" | sed "s/.* __asan_/__asan_/;s/(.*//" \
 // RUN:    > %t.interface
 // RUN: echo __asan_report_load1 >> %t.interface
diff --git a/lib/asan/lit_tests/lit.cfg b/lib/asan/lit_tests/lit.cfg
index 14ae142..7875281 100644
--- a/lib/asan/lit_tests/lit.cfg
+++ b/lib/asan/lit_tests/lit.cfg
@@ -56,10 +56,10 @@
             % compiler_rt_lit_cfg)
 lit.load_config(config, compiler_rt_lit_cfg)
 
-# Setup default compiler flags used with -faddress-sanitizer option.
+# Setup default compiler flags used with -fsanitize=address option.
 # FIXME: Review the set of required flags and check if it can be reduced.
 clang_asan_cxxflags = ("-ccc-cxx "
-                      + "-faddress-sanitizer "
+                      + "-fsanitize=address "
                       + "-mno-omit-leaf-frame-pointer "
                       + "-fno-omit-frame-pointer "
                       + "-fno-optimize-sibling-calls "
@@ -68,11 +68,10 @@
                                                 clang_asan_cxxflags + " ")) )
 
 # Setup path to external LLVM symbolizer to run AddressSanitizer output tests.
-llvm_obj_root = getattr(config, 'llvm_obj_root', None)
-if llvm_obj_root:
+llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
+if llvm_tools_dir:
   config.environment['LLVM_SYMBOLIZER_PATH'] = os.path.join(
-      config.llvm_obj_root, "projects", "compiler-rt", "utils",
-      "llvm-symbolizer", "llvm-symbolizer")
+      llvm_tools_dir, "llvm-symbolizer")
 
 # Setup path to symbolizer script.
 # FIXME: Instead we should copy this script to the build tree and point
diff --git a/lib/asan/lit_tests/sanity_check_pure_c.c b/lib/asan/lit_tests/sanity_check_pure_c.c
index b0bd1f0..2b5090b 100644
--- a/lib/asan/lit_tests/sanity_check_pure_c.c
+++ b/lib/asan/lit_tests/sanity_check_pure_c.c
@@ -1,9 +1,9 @@
 // Sanity checking a test in pure C.
-// RUN: %clang -g -faddress-sanitizer -O2 %s -o %t
+// RUN: %clang -g -fsanitize=address -O2 %s -o %t
 // RUN: %t 2>&1 | %symbolize | FileCheck %s
 
 // Sanity checking a test in pure C with -pie.
-// RUN: %clang -g -faddress-sanitizer -O2 %s -pie -o %t
+// RUN: %clang -g -fsanitize=address -O2 %s -pie -o %t
 // RUN: %t 2>&1 | %symbolize | FileCheck %s
 
 #include <stdlib.h>
diff --git a/lib/asan/lit_tests/sleep_before_dying.c b/lib/asan/lit_tests/sleep_before_dying.c
index 5fcb863..df9eba2 100644
--- a/lib/asan/lit_tests/sleep_before_dying.c
+++ b/lib/asan/lit_tests/sleep_before_dying.c
@@ -1,4 +1,4 @@
-// RUN: %clang -g -faddress-sanitizer -O2 %s -o %t
+// RUN: %clang -g -fsanitize=address -O2 %s -o %t
 // RUN: ASAN_OPTIONS="sleep_before_dying=1" %t 2>&1 | FileCheck %s
 
 #include <stdlib.h>
diff --git a/lib/asan/lit_tests/stack-use-after-return.cc b/lib/asan/lit_tests/stack-use-after-return.cc
index 201efa6..f8d8a1a 100644
--- a/lib/asan/lit_tests/stack-use-after-return.cc
+++ b/lib/asan/lit_tests/stack-use-after-return.cc
@@ -1,12 +1,20 @@
 // XFAIL: *
-// RUN: %clangxx_asan -m64 -O0 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
-// RUN: %clangxx_asan -m64 -O1 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
-// RUN: %clangxx_asan -m64 -O2 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
-// RUN: %clangxx_asan -m64 -O3 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
-// RUN: %clangxx_asan -m32 -O0 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
-// RUN: %clangxx_asan -m32 -O1 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
-// RUN: %clangxx_asan -m32 -O2 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
-// RUN: %clangxx_asan -m32 -O3 %s -o %t && %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -fsanitize=use-after-return -m64 -O0 %s -o %t && \
+// RUN:   %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -fsanitize=use-after-return -m64 -O1 %s -o %t && \
+// RUN:   %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -fsanitize=use-after-return -m64 -O2 %s -o %t && \
+// RUN:   %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -fsanitize=use-after-return -m64 -O3 %s -o %t && \
+// RUN:   %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -fsanitize=use-after-return -m32 -O0 %s -o %t && \
+// RUN:   %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -fsanitize=use-after-return -m32 -O1 %s -o %t && \
+// RUN:   %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -fsanitize=use-after-return -m32 -O2 %s -o %t && \
+// RUN:   %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -fsanitize=use-after-return -m32 -O3 %s -o %t && \
+// RUN:   %t 2>&1 | %symbolize | FileCheck %s
 
 #include <stdio.h>
 
@@ -26,9 +34,9 @@
 void Func2(char *x) {
   fprintf(stderr, "2: %p\n", x);
   *x = 1;
-  // CHECK: {{WRITE of size 1 .* thread T0}}
-  // CHECK: {{    #0.*Func2.*stack-use-after-return.cc:28}}
-  // CHECK: {{is located in frame <.*Func1.*> of T0's stack}}
+  // CHECK: WRITE of size 1 {{.*}} thread T0
+  // CHECK:     #0{{.*}}Func2{{.*}}stack-use-after-return.cc:[[@LINE-2]]
+  // CHECK: is located {{.*}} in frame <{{.*}}Func1{{.*}}> of T0's stack
 }
 
 int main(int argc, char **argv) {
diff --git a/lib/asan/lit_tests/strip_path_prefix.c b/lib/asan/lit_tests/strip_path_prefix.c
index 01c3b70..ef7bf98 100644
--- a/lib/asan/lit_tests/strip_path_prefix.c
+++ b/lib/asan/lit_tests/strip_path_prefix.c
@@ -1,4 +1,4 @@
-// RUN: %clang -g -faddress-sanitizer -O2 %s -o %t
+// RUN: %clang -g -fsanitize=address -O2 %s -o %t
 // RUN: ASAN_OPTIONS="strip_path_prefix='/'" %t 2>&1 | FileCheck %s
 
 #include <stdlib.h>
diff --git a/lib/asan/lit_tests/strncpy-overflow.cc b/lib/asan/lit_tests/strncpy-overflow.cc
index d3c66da..9381f13 100644
--- a/lib/asan/lit_tests/strncpy-overflow.cc
+++ b/lib/asan/lit_tests/strncpy-overflow.cc
@@ -24,7 +24,7 @@
   strncpy(short_buffer, hello, 10);  // BOOM
   // CHECK: {{WRITE of size 1 at 0x.* thread T0}}
   // CHECK-Linux: {{    #0 0x.* in .*strncpy}}
-  // CHECK-Darwin: {{    #0 0x.* in wrap_strncpy}}
+  // CHECK-Darwin: {{    #0 0x.* in _?wrap_strncpy}}
   // CHECK: {{    #1 0x.* in _?main .*strncpy-overflow.cc:24}}
   // CHECK: {{0x.* is located 0 bytes to the right of 9-byte region}}
   // CHECK: {{allocated by thread T0 here:}}
diff --git a/lib/asan/lit_tests/use-after-free.cc b/lib/asan/lit_tests/use-after-free.cc
index 0ab8e09..24d5a2a 100644
--- a/lib/asan/lit_tests/use-after-free.cc
+++ b/lib/asan/lit_tests/use-after-free.cc
@@ -30,10 +30,11 @@
   // CHECK-Linux: {{    #0 0x.* in .*free}}
   // CHECK-Linux: {{    #1 0x.* in main .*use-after-free.cc:21}}
 
-  // CHECK-Darwin: {{    #0 0x.* in .*mz_free.*}}
+  // CHECK-Darwin: {{    #0 0x.* in .*free_common.*}}
+  // CHECK-Darwin: {{    #1 0x.* in .*mz_free.*}}
   // We override free() on Darwin, thus no malloc_zone_free
-  // CHECK-Darwin: {{    #1 0x.* in wrap_free}}
-  // CHECK-Darwin: {{    #2 0x.* in _?main .*use-after-free.cc:21}}
+  // CHECK-Darwin: {{    #2 0x.* in _?wrap_free}}
+  // CHECK-Darwin: {{    #3 0x.* in _?main .*use-after-free.cc:21}}
 
   // CHECK: {{previously allocated by thread T0 here:}}
 
diff --git a/lib/asan/lit_tests/use-after-scope-inlined.cc b/lib/asan/lit_tests/use-after-scope-inlined.cc
new file mode 100644
index 0000000..c192bc2
--- /dev/null
+++ b/lib/asan/lit_tests/use-after-scope-inlined.cc
@@ -0,0 +1,29 @@
+// Test with "-O2" only to make sure inlining (leading to use-after-scope)
+// happens. "always_inline" is not enough, as Clang doesn't emit
+// llvm.lifetime intrinsics at -O0.
+//
+// RUN: %clangxx_asan -m64 -O2 -fsanitize=use-after-scope %s -o %t && \
+// RUN:     %t 2>&1 | %symbolize | FileCheck %s
+// RUN: %clangxx_asan -m32 -O2 -fsanitize=use-after-scope %s -o %t && \
+// RUN:     %t 2>&1 | %symbolize | FileCheck %s
+
+int *arr;
+
+__attribute__((always_inline))
+void inlined(int arg) {
+  int x[5];
+  for (int i = 0; i < arg; i++) x[i] = i;
+  arr = x;
+}
+
+int main(int argc, char *argv[]) {
+  inlined(argc);
+  return arr[argc - 1];  // BOOM
+  // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
+  // CHECK: READ of size 4 at 0x{{.*}} thread T0
+  // CHECK:   #0 0x{{.*}} in {{_?}}main
+  // CHECK:      {{.*}}use-after-scope-inlined.cc:[[@LINE-4]]
+  // CHECK: Address 0x{{.*}} is located at offset
+  // CHECK:      [[OFFSET:[^ ]*]] in frame <main> of T0's stack:
+  // CHECK:   {{\[}}[[OFFSET]], {{.*}}) 'x.i'
+}
diff --git a/lib/asan/tests/CMakeLists.txt b/lib/asan/tests/CMakeLists.txt
index af69579..44f188c 100644
--- a/lib/asan/tests/CMakeLists.txt
+++ b/lib/asan/tests/CMakeLists.txt
@@ -66,7 +66,7 @@
 set(ASAN_UNITTEST_INSTRUMENTED_CFLAGS
   ${ASAN_UNITTEST_COMMON_CFLAGS}
   ${ASAN_GTEST_INCLUDE_CFLAGS}
-  -faddress-sanitizer
+  -fsanitize=address
   -mllvm "-asan-blacklist=${ASAN_BLACKLIST_FILE}"
   -mllvm -asan-stack=1
   -mllvm -asan-globals=1
@@ -105,7 +105,6 @@
 
 set(ASAN_NOINST_TEST_SOURCES
   asan_noinst_test.cc
-  asan_break_optimization.cc
   asan_test_main.cc
 )
 
@@ -147,8 +146,7 @@
   add_asan_compile_command(asan_benchmarks_test.cc "")
   add_custom_target(AsanBenchmarks)
   set_target_properties(AsanBenchmarks PROPERTIES FOLDER "Asan benchmarks")
-  add_asan_test(AsanBenchmarks AsanBenchmark asan_break_optimization.cc
-                                             asan_benchmarks_test.cc.asan.o)
+  add_asan_test(AsanBenchmarks AsanBenchmark asan_benchmarks_test.cc.asan.o)
 endif()
 
 # Main AddressSanitizer unit tests.
diff --git a/lib/asan/tests/asan_break_optimization.cc b/lib/asan/tests/asan_break_optimization.cc
deleted file mode 100644
index 022a9f8..0000000
--- a/lib/asan/tests/asan_break_optimization.cc
+++ /dev/null
@@ -1,19 +0,0 @@
-//===-- asan_break_optimization.cc ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of AddressSanitizer, an address sanity checker.
-//
-//===----------------------------------------------------------------------===//
-
-#include "asan_test_utils.h"
-// Have this function in a separate file to avoid inlining.
-// (Yes, we know about cross-file inlining, but let's assume we don't use it).
-extern "C" void break_optimization(void *x) {
-  (void)x;
-}
diff --git a/lib/asan/tests/asan_noinst_test.cc b/lib/asan/tests/asan_noinst_test.cc
index 2fb5eb7..7f9938a 100644
--- a/lib/asan/tests/asan_noinst_test.cc
+++ b/lib/asan/tests/asan_noinst_test.cc
@@ -118,7 +118,7 @@
 }
 
 static uptr pc_array[] = {
-#if __WORDSIZE == 64
+#if SANITIZER_WORDSIZE == 64
   0x7effbf756068ULL,
   0x7effbf75e5abULL,
   0x7effc0625b7cULL,
@@ -164,7 +164,7 @@
   0x7effbcc3e726ULL,
   0x7effbcc40852ULL,
   0x7effb681ec4dULL,
-#endif  // __WORDSIZE
+#endif  // SANITIZER_WORDSIZE
   0xB0B5E768,
   0x7B682EC1,
   0x367F9918,
@@ -296,8 +296,8 @@
   size_t mmaped1 = __asan_get_heap_size();
   for (int i = 0; i < n_threads; i++) {
     pthread_t t;
-    pthread_create(&t, NULL, ThreadedQuarantineTestWorker, 0);
-    pthread_join(t, 0);
+    PTHREAD_CREATE(&t, NULL, ThreadedQuarantineTestWorker, 0);
+    PTHREAD_JOIN(t, 0);
     size_t mmaped2 = __asan_get_heap_size();
     EXPECT_LT(mmaped2 - mmaped1, 320U * (1 << 20));
   }
@@ -325,10 +325,10 @@
   const int kNumThreads = 4;
   pthread_t t[kNumThreads];
   for (int i = 0; i < kNumThreads; i++) {
-    pthread_create(&t[i], 0, ThreadedOneSizeMallocStress, 0);
+    PTHREAD_CREATE(&t[i], 0, ThreadedOneSizeMallocStress, 0);
   }
   for (int i = 0; i < kNumThreads; i++) {
-    pthread_join(t[i], 0);
+    PTHREAD_JOIN(t[i], 0);
   }
 }
 
@@ -336,11 +336,11 @@
   typedef void*(*memset_p)(void*, int, size_t);
   // Prevent inlining of memset().
   volatile memset_p libc_memset = (memset_p)memset;
-  EXPECT_DEATH(libc_memset((void*)(kLowShadowBeg + kPageSize), 0, 100),
+  EXPECT_DEATH(libc_memset((void*)(kLowShadowBeg + 200), 0, 100),
                "unknown-crash.*low shadow");
-  EXPECT_DEATH(libc_memset((void*)(kShadowGapBeg + kPageSize), 0, 100),
+  EXPECT_DEATH(libc_memset((void*)(kShadowGapBeg + 200), 0, 100),
                "unknown-crash.*shadow gap");
-  EXPECT_DEATH(libc_memset((void*)(kHighShadowBeg + kPageSize), 0, 100),
+  EXPECT_DEATH(libc_memset((void*)(kHighShadowBeg + 200), 0, 100),
                "unknown-crash.*high shadow");
 }
 
@@ -464,7 +464,7 @@
   // chunks to fulfill future requests. So, future requests will decrease
   // the number of free bytes. Do this only on systems where there
   // is enough memory for such assumptions.
-  if (__WORDSIZE == 64 && !ASAN_LOW_MEMORY) {
+  if (SANITIZER_WORDSIZE == 64 && !ASAN_LOW_MEMORY) {
     static const size_t kNumOfChunks = 100;
     static const size_t kChunkSize = 100;
     char *chunks[kNumOfChunks];
@@ -486,7 +486,8 @@
 
 static const size_t kManyThreadsMallocSizes[] = {5, 1UL<<10, 1UL<<20, 357};
 static const size_t kManyThreadsIterations = 250;
-static const size_t kManyThreadsNumThreads = (__WORDSIZE == 32) ? 40 : 200;
+static const size_t kManyThreadsNumThreads =
+  (SANITIZER_WORDSIZE == 32) ? 40 : 200;
 
 void *ManyThreadsWithStatsWorker(void *arg) {
   (void)arg;
@@ -503,11 +504,11 @@
   pthread_t threads[kManyThreadsNumThreads];
   before_test = __asan_get_current_allocated_bytes();
   for (i = 0; i < kManyThreadsNumThreads; i++) {
-    pthread_create(&threads[i], 0,
+    PTHREAD_CREATE(&threads[i], 0,
                    (void* (*)(void *x))ManyThreadsWithStatsWorker, (void*)i);
   }
   for (i = 0; i < kManyThreadsNumThreads; i++) {
-    pthread_join(threads[i], 0);
+    PTHREAD_JOIN(threads[i], 0);
   }
   after_test = __asan_get_current_allocated_bytes();
   // ASan stats also reflect memory usage of internal ASan RTL structs,
@@ -693,7 +694,7 @@
   std::vector<char *> pointers;
   std::vector<size_t> sizes;
   const size_t kNumMallocs =
-      (__WORDSIZE <= 32 || ASAN_LOW_MEMORY) ? 1 << 10 : 1 << 14;
+      (SANITIZER_WORDSIZE <= 32 || ASAN_LOW_MEMORY) ? 1 << 10 : 1 << 14;
   for (size_t i = 0; i < kNumMallocs; i++) {
     size_t size = i * 100 + 1;
     pointers.push_back((char*)malloc(size));
diff --git a/lib/asan/tests/asan_test.cc b/lib/asan/tests/asan_test.cc
index 5810f8f..7bb6e29 100644
--- a/lib/asan/tests/asan_test.cc
+++ b/lib/asan/tests/asan_test.cc
@@ -20,6 +20,10 @@
 #include <setjmp.h>
 #include <assert.h>
 
+#ifdef __linux__
+# include <sys/prctl.h>
+#endif
+
 #if defined(__i386__) || defined(__x86_64__)
 #include <emmintrin.h>
 #endif
@@ -129,6 +133,8 @@
 TEST(AddressSanitizer, HasFeatureAddressSanitizerTest) {
 #if defined(__has_feature) && __has_feature(address_sanitizer)
   bool asan = 1;
+#elif defined(__SANITIZE_ADDRESS__)
+  bool asan = 1;
 #else
   bool asan = 0;
 #endif
@@ -208,8 +214,8 @@
 void TSDDestructor(void *tsd) {
   // Spawning a thread will check that the current thread id is not -1.
   pthread_t th;
-  pthread_create(&th, NULL, TSDWorker, NULL);
-  pthread_join(th, NULL);
+  PTHREAD_CREATE(&th, NULL, TSDWorker, NULL);
+  PTHREAD_JOIN(th, NULL);
 }
 
 // This tests triggers the thread-specific data destruction fiasco which occurs
@@ -223,8 +229,8 @@
   pthread_t th;
   pthread_key_t test_key;
   pthread_key_create(&test_key, TSDDestructor);
-  pthread_create(&th, NULL, TSDWorker, &test_key);
-  pthread_join(th, NULL);
+  PTHREAD_CREATE(&th, NULL, TSDWorker, &test_key);
+  PTHREAD_JOIN(th, NULL);
   pthread_key_delete(test_key);
 }
 
@@ -344,7 +350,7 @@
 }
 
 TEST(AddressSanitizer, OutOfMemoryTest) {
-  size_t size = __WORDSIZE == 64 ? (size_t)(1ULL << 48) : (0xf0000000);
+  size_t size = SANITIZER_WORDSIZE == 64 ? (size_t)(1ULL << 48) : (0xf0000000);
   EXPECT_EQ(0, realloc(0, size));
   EXPECT_EQ(0, realloc(0, ~Ident(0)));
   EXPECT_EQ(0, malloc(size));
@@ -448,9 +454,9 @@
   // 32-bit Mac 10.7 gives even less (< 1G).
   // (the libSystem malloc() allows allocating up to 2300 megabytes without
   // ASan).
-  size_t n_megs = __WORDSIZE == 32 ? 500 : 4100;
+  size_t n_megs = SANITIZER_WORDSIZE == 32 ? 500 : 4100;
 #else
-  size_t n_megs = __WORDSIZE == 32 ? 2600 : 4100;
+  size_t n_megs = SANITIZER_WORDSIZE == 32 ? 2600 : 4100;
 #endif
   TestLargeMalloc(n_megs << 20);
 }
@@ -461,11 +467,11 @@
   const int kNumIterations = (ASAN_LOW_MEMORY) ? 10000 : 100000;
   pthread_t t[kNumThreads];
   for (int i = 0; i < kNumThreads; i++) {
-    pthread_create(&t[i], 0, (void* (*)(void *x))MallocStress,
+    PTHREAD_CREATE(&t[i], 0, (void* (*)(void *x))MallocStress,
         (void*)kNumIterations);
   }
   for (int i = 0; i < kNumThreads; i++) {
-    pthread_join(t[i], 0);
+    PTHREAD_JOIN(t[i], 0);
   }
 }
 
@@ -479,13 +485,14 @@
 }
 
 TEST(AddressSanitizer, ManyThreadsTest) {
-  const size_t kNumThreads = __WORDSIZE == 32 ? 30 : 1000;
+  const size_t kNumThreads =
+      (SANITIZER_WORDSIZE == 32 || ASAN_AVOID_EXPENSIVE_TESTS) ? 30 : 1000;
   pthread_t t[kNumThreads];
   for (size_t i = 0; i < kNumThreads; i++) {
-    pthread_create(&t[i], 0, (void* (*)(void *x))ManyThreadsWorker, (void*)i);
+    PTHREAD_CREATE(&t[i], 0, ManyThreadsWorker, (void*)i);
   }
   for (size_t i = 0; i < kNumThreads; i++) {
-    pthread_join(t[i], 0);
+    PTHREAD_JOIN(t[i], 0);
   }
 }
 
@@ -637,6 +644,17 @@
   longjmp(buf, 1);
 }
 
+NOINLINE void BuiltinLongJmpFunc1(jmp_buf buf) {
+  // create three red zones for these two stack objects.
+  int a;
+  int b;
+
+  int *A = Ident(&a);
+  int *B = Ident(&b);
+  *A = *B;
+  __builtin_longjmp((void**)buf, 1);
+}
+
 NOINLINE void UnderscopeLongJmpFunc1(jmp_buf buf) {
   // create three red zones for these two stack objects.
   int a;
@@ -677,6 +695,17 @@
   }
 }
 
+#if not defined(__ANDROID__)
+TEST(AddressSanitizer, BuiltinLongJmpTest) {
+  static jmp_buf buf;
+  if (!__builtin_setjmp((void**)buf)) {
+    BuiltinLongJmpFunc1(buf);
+  } else {
+    TouchStackFunc();
+  }
+}
+#endif  // not defined(__ANDROID__)
+
 TEST(AddressSanitizer, UnderscopeLongJmpTest) {
   static jmp_buf buf;
   if (!_setjmp(buf)) {
@@ -710,7 +739,7 @@
 TEST(AddressSanitizer, CxxExceptionTest) {
   if (ASAN_UAR) return;
   // TODO(kcc): this test crashes on 32-bit for some reason...
-  if (__WORDSIZE == 32) return;
+  if (SANITIZER_WORDSIZE == 32) return;
   try {
     ThrowFunc();
   } catch(...) {}
@@ -737,10 +766,10 @@
 
 TEST(AddressSanitizer, ThreadStackReuseTest) {
   pthread_t t;
-  pthread_create(&t, 0, ThreadStackReuseFunc1, 0);
-  pthread_join(t, 0);
-  pthread_create(&t, 0, ThreadStackReuseFunc2, 0);
-  pthread_join(t, 0);
+  PTHREAD_CREATE(&t, 0, ThreadStackReuseFunc1, 0);
+  PTHREAD_JOIN(t, 0);
+  PTHREAD_CREATE(&t, 0, ThreadStackReuseFunc2, 0);
+  PTHREAD_JOIN(t, 0);
 }
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -952,6 +981,7 @@
   size_t length = Ident(10);
   char *heap_string = Ident((char*)malloc(length + 1));
   char stack_string[10 + 1];
+  break_optimization(&stack_string);
   for (size_t i = 0; i < length; i++) {
     heap_string[i] = 'a';
     stack_string[i] = 'b';
@@ -1580,7 +1610,7 @@
 TEST(AddressSanitizer, DISABLED_LargeFunctionSymbolizeTest) {
   int failing_line = LargeFunction(false);
   char expected_warning[128];
-  sprintf(expected_warning, "LargeFunction.*asan_test.cc:%d", failing_line);
+  sprintf(expected_warning, "LargeFunction.*asan_test.*:%d", failing_line);
   EXPECT_DEATH(LargeFunction(true), expected_warning);
 }
 
@@ -1593,19 +1623,28 @@
                "malloc_fff.*malloc_eee.*malloc_ddd");
 }
 
+static void TryToSetThreadName(const char *name) {
+#ifdef __linux__
+  prctl(PR_SET_NAME, (unsigned long)name, 0, 0, 0);
+#endif
+}
+
 void *ThreadedTestAlloc(void *a) {
+  TryToSetThreadName("AllocThr");
   int **p = (int**)a;
   *p = new int;
   return 0;
 }
 
 void *ThreadedTestFree(void *a) {
+  TryToSetThreadName("FreeThr");
   int **p = (int**)a;
   delete *p;
   return 0;
 }
 
 void *ThreadedTestUse(void *a) {
+  TryToSetThreadName("UseThr");
   int **p = (int**)a;
   **p = 1;
   return 0;
@@ -1614,12 +1653,12 @@
 void ThreadedTestSpawn() {
   pthread_t t;
   int *x;
-  pthread_create(&t, 0, ThreadedTestAlloc, &x);
-  pthread_join(t, 0);
-  pthread_create(&t, 0, ThreadedTestFree, &x);
-  pthread_join(t, 0);
-  pthread_create(&t, 0, ThreadedTestUse, &x);
-  pthread_join(t, 0);
+  PTHREAD_CREATE(&t, 0, ThreadedTestAlloc, &x);
+  PTHREAD_JOIN(t, 0);
+  PTHREAD_CREATE(&t, 0, ThreadedTestFree, &x);
+  PTHREAD_JOIN(t, 0);
+  PTHREAD_CREATE(&t, 0, ThreadedTestUse, &x);
+  PTHREAD_JOIN(t, 0);
 }
 
 TEST(AddressSanitizer, ThreadedTest) {
@@ -1630,9 +1669,24 @@
                ".*Thread T.*created");
 }
 
+#ifdef __linux__
+TEST(AddressSanitizer, ThreadNamesTest) {
+  // ThreadedTestSpawn();
+  EXPECT_DEATH(ThreadedTestSpawn(),
+               ASAN_PCRE_DOTALL
+               "WRITE .*thread T3 .UseThr."
+               ".*freed by thread T2 .FreeThr. here:"
+               ".*previously allocated by thread T1 .AllocThr. here:"
+               ".*Thread T3 .UseThr. created by T0 here:"
+               ".*Thread T2 .FreeThr. created by T0 here:"
+               ".*Thread T1 .AllocThr. created by T0 here:"
+               "");
+}
+#endif
+
 #if ASAN_NEEDS_SEGV
 TEST(AddressSanitizer, ShadowGapTest) {
-#if __WORDSIZE == 32
+#if SANITIZER_WORDSIZE == 32
   char *addr = (char*)0x22000000;
 #else
   char *addr = (char*)0x0000100000080000;
@@ -1724,7 +1778,7 @@
   static char zoo[10];
   const char *p = Ident(zoo);
   // The file name should be present in the report.
-  EXPECT_DEATH(Ident(p[15]), "zoo.*asan_test.cc");
+  EXPECT_DEATH(Ident(p[15]), "zoo.*asan_test.");
 }
 
 int *ReturnsPointerToALocalObject() {
@@ -1777,10 +1831,10 @@
   const int kNumThreads = 20;
   pthread_t t[kNumThreads];
   for (int i = 0; i < kNumThreads; i++) {
-    pthread_create(&t[i], 0, (void* (*)(void *x))LotsOfStackReuse, 0);
+    PTHREAD_CREATE(&t[i], 0, (void* (*)(void *x))LotsOfStackReuse, 0);
   }
   for (int i = 0; i < kNumThreads; i++) {
-    pthread_join(t[i], 0);
+    PTHREAD_JOIN(t[i], 0);
   }
 }
 
@@ -1792,8 +1846,8 @@
 TEST(AddressSanitizer, PthreadExitTest) {
   pthread_t t;
   for (int i = 0; i < 1000; i++) {
-    pthread_create(&t, 0, PthreadExit, 0);
-    pthread_join(t, 0);
+    PTHREAD_CREATE(&t, 0, PthreadExit, 0);
+    PTHREAD_JOIN(t, 0);
   }
 }
 
@@ -1862,8 +1916,8 @@
 
 TEST(AddressSanitizer, DISABLED_DemoThreadStackTest) {
   pthread_t t;
-  pthread_create(&t, 0, SimpleBugOnSTack, 0);
-  pthread_join(t, 0);
+  PTHREAD_CREATE(&t, 0, SimpleBugOnSTack, 0);
+  PTHREAD_JOIN(t, 0);
 }
 
 TEST(AddressSanitizer, DISABLED_DemoUAFLowIn) {
@@ -1897,7 +1951,7 @@
 }
 
 TEST(AddressSanitizer, DISABLED_DemoOOM) {
-  size_t size = __WORDSIZE == 64 ? (size_t)(1ULL << 40) : (0xf0000000);
+  size_t size = SANITIZER_WORDSIZE == 64 ? (size_t)(1ULL << 40) : (0xf0000000);
   printf("%p\n", malloc(size));
 }
 
@@ -1953,8 +2007,8 @@
 
 void CFAllocator_DoubleFreeOnPthread() {
   pthread_t child;
-  pthread_create(&child, NULL, CFAllocatorDefaultDoubleFree, NULL);
-  pthread_join(child, NULL);  // Shouldn't be reached.
+  PTHREAD_CREATE(&child, NULL, CFAllocatorDefaultDoubleFree, NULL);
+  PTHREAD_JOIN(child, NULL);  // Shouldn't be reached.
 }
 
 TEST(AddressSanitizerMac, CFAllocatorDefaultDoubleFree_ChildPhread) {
@@ -1979,10 +2033,10 @@
 
 void CFAllocator_PassMemoryToAnotherThread() {
   pthread_t th1, th2;
-  pthread_create(&th1, NULL, CFAllocatorAllocateToGlob, NULL);
-  pthread_join(th1, NULL);
-  pthread_create(&th2, NULL, CFAllocatorDeallocateFromGlob, NULL);
-  pthread_join(th2, NULL);
+  PTHREAD_CREATE(&th1, NULL, CFAllocatorAllocateToGlob, NULL);
+  PTHREAD_JOIN(th1, NULL);
+  PTHREAD_CREATE(&th2, NULL, CFAllocatorDeallocateFromGlob, NULL);
+  PTHREAD_JOIN(th2, NULL);
 }
 
 TEST(AddressSanitizerMac, CFAllocator_PassMemoryToAnotherThread) {
@@ -2098,13 +2152,13 @@
   for (iter = 0; iter < kNumIterations; iter++) {
     pthread_t workers[kNumWorkers], forker;
     for (i = 0; i < kNumWorkers; i++) {
-      pthread_create(&workers[i], 0, MallocIntrospectionLockWorker, 0);
+      PTHREAD_CREATE(&workers[i], 0, MallocIntrospectionLockWorker, 0);
     }
-    pthread_create(&forker, 0, MallocIntrospectionLockForker, 0);
+    PTHREAD_CREATE(&forker, 0, MallocIntrospectionLockForker, 0);
     for (i = 0; i < kNumWorkers; i++) {
-      pthread_join(workers[i], 0);
+      PTHREAD_JOIN(workers[i], 0);
     }
-    pthread_join(forker, 0);
+    PTHREAD_JOIN(forker, 0);
   }
 }
 
@@ -2120,8 +2174,8 @@
   pthread_t th;
   pthread_key_t test_key;
   pthread_key_create(&test_key, CallFreeOnWorkqueue);
-  pthread_create(&th, NULL, TSDAllocWorker, &test_key);
-  pthread_join(th, NULL);
+  PTHREAD_CREATE(&th, NULL, TSDAllocWorker, &test_key);
+  PTHREAD_JOIN(th, NULL);
   pthread_key_delete(test_key);
 }
 
diff --git a/lib/asan/tests/asan_test_config.h b/lib/asan/tests/asan_test_config.h
index 5e7501f..1d28e99 100644
--- a/lib/asan/tests/asan_test_config.h
+++ b/lib/asan/tests/asan_test_config.h
@@ -21,7 +21,11 @@
 #include <string>
 #include <map>
 
-#include "gtest/gtest.h"
+#if ASAN_USE_DEJAGNU_GTEST
+# include "dejagnu-gtest.h"
+#else
+# include "gtest/gtest.h"
+#endif
 
 using std::string;
 using std::vector;
@@ -44,7 +48,11 @@
 #endif
 
 #ifndef ASAN_LOW_MEMORY
-#define ASAN_LOW_MEMORY 0
+# define ASAN_LOW_MEMORY 0
+#endif
+
+#ifndef ASAN_AVOID_EXPENSIVE_TESTS
+# define ASAN_AVOID_EXPENSIVE_TESTS 0
 #endif
 
 #define ASAN_PCRE_DOTALL ""
diff --git a/lib/asan/tests/asan_test_utils.h b/lib/asan/tests/asan_test_utils.h
index ffcaaec..f810438 100644
--- a/lib/asan/tests/asan_test_utils.h
+++ b/lib/asan/tests/asan_test_utils.h
@@ -40,23 +40,23 @@
 #define __has_feature(x) 0
 #endif
 
-#if __has_feature(address_sanitizer)
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
 # define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
     __attribute__((no_address_safety_analysis))
 #else
 # define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
 #endif
 
-#ifndef __WORDSIZE
 #if __LP64__ || defined(_WIN64)
-#define __WORDSIZE 64
+#  define SANITIZER_WORDSIZE 64
 #else
-#define __WORDSIZE 32
-#endif
+#  define SANITIZER_WORDSIZE 32
 #endif
 
-// Make the compiler think that something is going on there.
-extern "C" void break_optimization(void *arg);
+// Make the compiler thinks that something is going on there.
+inline void break_optimization(void *arg) {
+  __asm__ __volatile__("" : : "r" (arg) : "memory");
+}
 
 // This function returns its parameter but in such a way that compiler
 // can not prove it.
@@ -68,4 +68,8 @@
   return ret;
 }
 
+// Check that pthread_create/pthread_join return success.
+#define PTHREAD_CREATE(a, b, c, d) ASSERT_EQ(0, pthread_create(a, b, c, d))
+#define PTHREAD_JOIN(a, b) ASSERT_EQ(0, pthread_join(a, b))
+
 #endif  // ASAN_TEST_UTILS_H
diff --git a/lib/interception/mach_override/mach_override.c b/lib/interception/mach_override/mach_override.c
index ba5653c..7511a7b 100644
--- a/lib/interception/mach_override/mach_override.c
+++ b/lib/interception/mach_override/mach_override.c
@@ -725,6 +725,8 @@
         { 0x2, {0xFF, 0x00}, {0x89, 0x00} },                               // mov r/m32,r32 or r/m16,r16
         { 0x3, {0xFF, 0xFF, 0xFF}, {0x49, 0x89, 0xF8} },                   // mov %rdi,%r8
         { 0x4, {0xFF, 0xFF, 0xFF, 0xFF}, {0x40, 0x0F, 0xBE, 0xCE} },       // movsbl %sil,%ecx
+        { 0x7, {0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00},
+               {0x48, 0x8D, 0x05, 0x00, 0x00, 0x00, 0x00} },  // lea $imm(%rip),%rax
         { 0x3, {0xFF, 0xFF, 0xFF}, {0x0F, 0xBE, 0xCE} },  // movsbl, %dh, %ecx
         { 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} },  // pushq $imm(%rdi)
         { 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, // fninit
diff --git a/lib/sanitizer_common/sanitizer_allocator.cc b/lib/sanitizer_common/sanitizer_allocator.cc
index 985f91a..b13a7c6 100644
--- a/lib/sanitizer_common/sanitizer_allocator.cc
+++ b/lib/sanitizer_common/sanitizer_allocator.cc
@@ -49,13 +49,6 @@
   LIBC_FREE(addr);
 }
 
-void *InternalAllocBlock(void *p) {
-  CHECK_NE(p, (void*)0);
-  u64 *pp = (u64*)((uptr)p & ~0x7);
-  for (; pp[0] != kBlockMagic; pp--) {}
-  return pp + 1;
-}
-
 // LowLevelAllocator
 static LowLevelAllocateCallback low_level_alloc_callback;
 
@@ -63,7 +56,7 @@
   // Align allocation size.
   size = RoundUpTo(size, 8);
   if (allocated_end_ - allocated_current_ < (sptr)size) {
-    uptr size_to_allocate = Max(size, kPageSize);
+    uptr size_to_allocate = Max(size, GetPageSizeCached());
     allocated_current_ =
         (char*)MmapOrDie(size_to_allocate, __FUNCTION__);
     allocated_end_ = allocated_current_ + size_to_allocate;
diff --git a/lib/sanitizer_common/sanitizer_allocator64.h b/lib/sanitizer_common/sanitizer_allocator.h
similarity index 61%
rename from lib/sanitizer_common/sanitizer_allocator64.h
rename to lib/sanitizer_common/sanitizer_allocator.h
index 71196b1..6325088 100644
--- a/lib/sanitizer_common/sanitizer_allocator64.h
+++ b/lib/sanitizer_common/sanitizer_allocator.h
@@ -1,4 +1,4 @@
-//===-- sanitizer_allocator64.h ---------------------------------*- C++ -*-===//
+//===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,22 +6,15 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-// Specialized allocator which works only in 64-bit address space.
-// To be used by ThreadSanitizer, MemorySanitizer and possibly other tools.
-// The main feature of this allocator is that the header is located far away
-// from the user memory region, so that the tool does not use extra shadow
-// for the header.
 //
-// Status: not yet ready.
+// Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
+//
 //===----------------------------------------------------------------------===//
+
 #ifndef SANITIZER_ALLOCATOR_H
 #define SANITIZER_ALLOCATOR_H
 
 #include "sanitizer_internal_defs.h"
-#if __WORDSIZE != 64
-# error "sanitizer_allocator64.h can only be used on 64-bit platforms"
-#endif
-
 #include "sanitizer_common.h"
 #include "sanitizer_libc.h"
 #include "sanitizer_list.h"
@@ -30,7 +23,10 @@
 namespace __sanitizer {
 
 // Maps size class id to size and back.
-class DefaultSizeClassMap {
+template <uptr l0, uptr l1, uptr l2, uptr l3, uptr l4, uptr l5,
+          uptr s0, uptr s1, uptr s2, uptr s3, uptr s4,
+          uptr c0, uptr c1, uptr c2, uptr c3, uptr c4>
+class SplineSizeClassMap {
  private:
   // Here we use a spline composed of 5 polynomials of oder 1.
   // The first size class is l0, then the classes go with step s0
@@ -38,38 +34,20 @@
   // Steps should be powers of two for cheap division.
   // The size of the last size class should be a power of two.
   // There should be at most 256 size classes.
-  static const uptr l0 = 1 << 4;
-  static const uptr l1 = 1 << 9;
-  static const uptr l2 = 1 << 12;
-  static const uptr l3 = 1 << 15;
-  static const uptr l4 = 1 << 18;
-  static const uptr l5 = 1 << 21;
-
-  static const uptr s0 = 1 << 4;
-  static const uptr s1 = 1 << 6;
-  static const uptr s2 = 1 << 9;
-  static const uptr s3 = 1 << 12;
-  static const uptr s4 = 1 << 15;
-
   static const uptr u0 = 0  + (l1 - l0) / s0;
   static const uptr u1 = u0 + (l2 - l1) / s1;
   static const uptr u2 = u1 + (l3 - l2) / s2;
   static const uptr u3 = u2 + (l4 - l3) / s3;
   static const uptr u4 = u3 + (l5 - l4) / s4;
 
-  // Max cached in local cache blocks.
-  static const uptr c0 = 256;
-  static const uptr c1 = 64;
-  static const uptr c2 = 16;
-  static const uptr c3 = 4;
-  static const uptr c4 = 1;
-
  public:
+  // The number of size classes should be a power of two for fast division.
   static const uptr kNumClasses = u4 + 1;
   static const uptr kMaxSize = l5;
   static const uptr kMinSize = l0;
 
   COMPILER_CHECK(kNumClasses <= 256);
+  COMPILER_CHECK((kNumClasses & (kNumClasses - 1)) == 0);
   COMPILER_CHECK((kMaxSize & (kMaxSize - 1)) == 0);
 
   static uptr Size(uptr class_id) {
@@ -99,13 +77,52 @@
   }
 };
 
+class DefaultSizeClassMap: public SplineSizeClassMap<
+  /* l: */1 << 4, 1 << 9,  1 << 12, 1 << 15, 1 << 18, 1 << 21,
+  /* s: */1 << 4, 1 << 6,  1 << 9,  1 << 12, 1 << 15,
+  /* c: */256,    64,      16,      4,       1> {
+ private:
+  COMPILER_CHECK(kNumClasses == 256);
+};
+
+class CompactSizeClassMap: public SplineSizeClassMap<
+  /* l: */1 << 3, 1 << 4,  1 << 7, 1 << 8, 1 << 12, 1 << 15,
+  /* s: */1 << 3, 1 << 4,  1 << 7, 1 << 8, 1 << 12,
+  /* c: */256,    64,      16,      4,       1> {
+ private:
+  COMPILER_CHECK(kNumClasses <= 32);
+};
+
 struct AllocatorListNode {
   AllocatorListNode *next;
 };
 
 typedef IntrusiveList<AllocatorListNode> AllocatorFreeList;
 
+// Move at most max_count chunks from allocate_from to allocate_to.
+// This function is better be a method of AllocatorFreeList, but we can't
+// inherit it from IntrusiveList as the ancient gcc complains about non-PODness.
+static inline void BulkMove(uptr max_count,
+                            AllocatorFreeList *allocate_from,
+                            AllocatorFreeList *allocate_to) {
+  CHECK(!allocate_from->empty());
+  CHECK(allocate_to->empty());
+  if (allocate_from->size() <= max_count) {
+    allocate_to->append_front(allocate_from);
+    CHECK(allocate_from->empty());
+  } else {
+    for (uptr i = 0; i < max_count; i++) {
+      AllocatorListNode *node = allocate_from->front();
+      allocate_from->pop_front();
+      allocate_to->push_front(node);
+    }
+    CHECK(!allocate_from->empty());
+  }
+  CHECK(!allocate_to->empty());
+}
 
+// SizeClassAllocator64 -- allocator for 64-bit address space.
+//
 // Space: a portion of address space of kSpaceSize bytes starting at
 // a fixed address (kSpaceBeg). Both constants are powers of two and
 // kSpaceBeg is kSpaceSize-aligned.
@@ -133,8 +150,9 @@
   }
 
   void *Allocate(uptr size, uptr alignment) {
+    if (size < alignment) size = alignment;
     CHECK(CanAllocate(size, alignment));
-    return AllocateBySizeClass(SizeClassMap::ClassID(size));
+    return AllocateBySizeClass(ClassID(size));
   }
 
   void Deallocate(void *p) {
@@ -150,18 +168,7 @@
     if (region->free_list.empty()) {
       PopulateFreeList(class_id, region);
     }
-    CHECK(!region->free_list.empty());
-    uptr count = SizeClassMap::MaxCached(class_id);
-    if (region->free_list.size() <= count) {
-      free_list->append_front(&region->free_list);
-    } else {
-      for (uptr i = 0; i < count; i++) {
-        AllocatorListNode *node = region->free_list.front();
-        region->free_list.pop_front();
-        free_list->push_front(node);
-      }
-    }
-    CHECK(!free_list->empty());
+    BulkMove(SizeClassMap::MaxCached(class_id), &region->free_list, free_list);
   }
 
   // Swallow the entire free_list for the given class_id.
@@ -186,7 +193,7 @@
     uptr chunk_idx = GetChunkIdx((uptr)p, size);
     uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
     uptr begin = reg_beg + chunk_idx * size;
-    return (void*)begin;
+    return reinterpret_cast<void*>(begin);
   }
 
   static uptr GetActuallyAllocatedSize(void *p) {
@@ -217,17 +224,16 @@
   }
 
   static uptr AllocBeg()  { return kSpaceBeg; }
-  static uptr AllocEnd()  { return kSpaceBeg  + kSpaceSize + AdditionalSize(); }
   static uptr AllocSize() { return kSpaceSize + AdditionalSize(); }
 
-  static const uptr kNumClasses = 256;  // Power of two <= 256
   typedef SizeClassMap SizeClassMapT;
+  static const uptr kNumClasses = SizeClassMap::kNumClasses;  // 2^k <= 256
 
  private:
-  COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
-  COMPILER_CHECK(kNumClasses <= SizeClassMap::kNumClasses);
   static const uptr kRegionSize = kSpaceSize / kNumClasses;
-  COMPILER_CHECK((kRegionSize >> 32) > 0);  // kRegionSize must be >= 2^32.
+  COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
+  // kRegionSize must be >= 2^32.
+  COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
   // Populate the free list with at most this number of bytes at once
   // or with one element if its size is greater.
   static const uptr kPopulateSize = 1 << 18;
@@ -242,8 +248,9 @@
   COMPILER_CHECK(sizeof(RegionInfo) == kCacheLineSize);
 
   static uptr AdditionalSize() {
-    uptr res = sizeof(RegionInfo) * kNumClasses;
-    CHECK_EQ(res % kPageSize, 0);
+    uptr PageSize = GetPageSizeCached();
+    uptr res = Max(sizeof(RegionInfo) * kNumClasses, PageSize);
+    CHECK_EQ(res % PageSize, 0);
     return res;
   }
 
@@ -262,10 +269,10 @@
   }
 
   void PopulateFreeList(uptr class_id, RegionInfo *region) {
+    CHECK(region->free_list.empty());
     uptr size = SizeClassMap::Size(class_id);
     uptr beg_idx = region->allocated_user;
     uptr end_idx = beg_idx + kPopulateSize;
-    region->free_list.clear();
     uptr region_beg = kSpaceBeg + kRegionSize * class_id;
     uptr idx = beg_idx;
     uptr i = 0;
@@ -277,7 +284,12 @@
     } while (idx < end_idx);
     region->allocated_user += idx - beg_idx;
     region->allocated_meta += i * kMetadataSize;
-    CHECK_LT(region->allocated_user + region->allocated_meta, kRegionSize);
+    if (region->allocated_user + region->allocated_meta > kRegionSize) {
+      Printf("Out of memory. Dying.\n");
+      Printf("The process has exhausted %zuMB for size class %zu.\n",
+          kRegionSize / 1024 / 1024, size);
+      Die();
+    }
   }
 
   void *AllocateBySizeClass(uptr class_id) {
@@ -300,11 +312,198 @@
   }
 };
 
+// SizeClassAllocator32 -- allocator for 32-bit address space.
+// This allocator can theoretically be used on 64-bit arch, but there it is less
+// efficient than SizeClassAllocator64.
+//
+// [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
+// be returned by MmapOrDie().
+//
+// Region:
+//   a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
+// Since the regions are aligned by kRegionSize, there are exactly
+// kNumPossibleRegions possible regions in the address space and so we keep
+// an u8 array possible_regions_[kNumPossibleRegions] to store the size classes.
+// 0 size class means the region is not used by the allocator.
+//
+// One Region is used to allocate chunks of a single size class.
+// A Region looks like this:
+// UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
+//
+// In order to avoid false sharing the objects of this class should be
+// chache-line aligned.
+template <const uptr kSpaceBeg, const u64 kSpaceSize,
+          const uptr kMetadataSize, class SizeClassMap>
+class SizeClassAllocator32 {
+ public:
+  // Don't need to call Init if the object is a global (i.e. zero-initialized).
+  void Init() {
+    internal_memset(this, 0, sizeof(*this));
+  }
+
+  bool CanAllocate(uptr size, uptr alignment) {
+    return size <= SizeClassMap::kMaxSize &&
+      alignment <= SizeClassMap::kMaxSize;
+  }
+
+  void *Allocate(uptr size, uptr alignment) {
+    if (size < alignment) size = alignment;
+    CHECK(CanAllocate(size, alignment));
+    return AllocateBySizeClass(ClassID(size));
+  }
+
+  void Deallocate(void *p) {
+    CHECK(PointerIsMine(p));
+    DeallocateBySizeClass(p, GetSizeClass(p));
+  }
+
+  void *GetMetaData(void *p) {
+    CHECK(PointerIsMine(p));
+    uptr mem = reinterpret_cast<uptr>(p);
+    uptr beg = ComputeRegionBeg(mem);
+    uptr size = SizeClassMap::Size(GetSizeClass(p));
+    u32 offset = mem - beg;
+    uptr n = offset / (u32)size;  // 32-bit division
+    uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
+    return reinterpret_cast<void*>(meta);
+  }
+
+  // Allocate several chunks of the given class_id.
+  void BulkAllocate(uptr class_id, AllocatorFreeList *free_list) {
+    SizeClassInfo *sci = GetSizeClassInfo(class_id);
+    SpinMutexLock l(&sci->mutex);
+    EnsureSizeClassHasAvailableChunks(sci, class_id);
+    CHECK(!sci->free_list.empty());
+    BulkMove(SizeClassMap::MaxCached(class_id), &sci->free_list, free_list);
+  }
+
+  // Swallow the entire free_list for the given class_id.
+  void BulkDeallocate(uptr class_id, AllocatorFreeList *free_list) {
+    SizeClassInfo *sci = GetSizeClassInfo(class_id);
+    SpinMutexLock l(&sci->mutex);
+    sci->free_list.append_front(free_list);
+  }
+
+  bool PointerIsMine(void *p) {
+    return possible_regions_[ComputeRegionId(reinterpret_cast<uptr>(p))] != 0;
+  }
+
+  uptr GetSizeClass(void *p) {
+    return possible_regions_[ComputeRegionId(reinterpret_cast<uptr>(p))] - 1;
+  }
+
+  void *GetBlockBegin(void *p) {
+    CHECK(PointerIsMine(p));
+    uptr mem = reinterpret_cast<uptr>(p);
+    uptr beg = ComputeRegionBeg(mem);
+    uptr size = SizeClassMap::Size(GetSizeClass(p));
+    u32 offset = mem - beg;
+    u32 n = offset / (u32)size;  // 32-bit division
+    uptr res = beg + (n * (u32)size);
+    return reinterpret_cast<void*>(res);
+  }
+
+  uptr GetActuallyAllocatedSize(void *p) {
+    CHECK(PointerIsMine(p));
+    return SizeClassMap::Size(GetSizeClass(p));
+  }
+
+  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
+
+  uptr TotalMemoryUsed() {
+    // No need to lock here.
+    uptr res = 0;
+    for (uptr i = 0; i < kNumPossibleRegions; i++)
+      if (possible_regions_[i])
+        res += kRegionSize;
+    return res;
+  }
+
+  void TestOnlyUnmap() {
+    for (uptr i = 0; i < kNumPossibleRegions; i++)
+      if (possible_regions_[i])
+        UnmapOrDie(reinterpret_cast<void*>(i * kRegionSize), kRegionSize);
+  }
+
+  typedef SizeClassMap SizeClassMapT;
+  static const uptr kNumClasses = SizeClassMap::kNumClasses;  // 2^k <= 128
+
+ private:
+  static const uptr kRegionSizeLog = SANITIZER_WORDSIZE == 64 ? 24 : 20;
+  static const uptr kRegionSize = 1 << kRegionSizeLog;
+  static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
+  COMPILER_CHECK(kNumClasses <= 128);
+
+  struct SizeClassInfo {
+    SpinMutex mutex;
+    AllocatorFreeList free_list;
+    char padding[kCacheLineSize - sizeof(uptr) - sizeof(AllocatorFreeList)];
+  };
+  COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
+
+  uptr ComputeRegionId(uptr mem) {
+    uptr res = mem >> kRegionSizeLog;
+    CHECK_LT(res, kNumPossibleRegions);
+    return res;
+  }
+
+  uptr ComputeRegionBeg(uptr mem) {
+    return mem & ~(kRegionSize - 1);
+  }
+
+  uptr AllocateRegion(uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
+                                      "SizeClassAllocator32"));
+    CHECK_EQ(0U, (res & (kRegionSize - 1)));
+    CHECK_EQ(0U, possible_regions_[ComputeRegionId(res)]);
+    possible_regions_[ComputeRegionId(res)] = class_id + 1;
+    return res;
+  }
+
+  SizeClassInfo *GetSizeClassInfo(uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    return &size_class_info_array_[class_id];
+  }
+
+  void EnsureSizeClassHasAvailableChunks(SizeClassInfo *sci, uptr class_id) {
+    if (!sci->free_list.empty()) return;
+    uptr size = SizeClassMap::Size(class_id);
+    uptr reg = AllocateRegion(class_id);
+    uptr n_chunks = kRegionSize / (size + kMetadataSize);
+    for (uptr i = reg; i < reg + n_chunks * size; i += size)
+      sci->free_list.push_back(reinterpret_cast<AllocatorListNode*>(i));
+  }
+
+  void *AllocateBySizeClass(uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    SizeClassInfo *sci = GetSizeClassInfo(class_id);
+    SpinMutexLock l(&sci->mutex);
+    EnsureSizeClassHasAvailableChunks(sci, class_id);
+    CHECK(!sci->free_list.empty());
+    AllocatorListNode *node = sci->free_list.front();
+    sci->free_list.pop_front();
+    return reinterpret_cast<void*>(node);
+  }
+
+  void DeallocateBySizeClass(void *p, uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    SizeClassInfo *sci = GetSizeClassInfo(class_id);
+    SpinMutexLock l(&sci->mutex);
+    sci->free_list.push_front(reinterpret_cast<AllocatorListNode*>(p));
+  }
+
+  u8 possible_regions_[kNumPossibleRegions];
+  SizeClassInfo size_class_info_array_[kNumClasses];
+};
+
 // Objects of this type should be used as local caches for SizeClassAllocator64.
 // Since the typical use of this class is to have one object per thread in TLS,
 // is has to be POD.
-template<const uptr kNumClasses, class SizeClassAllocator>
+template<class SizeClassAllocator>
 struct SizeClassAllocatorLocalCache {
+  typedef SizeClassAllocator Allocator;
+  static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
   // Don't need to call Init if the object is a global (i.e. zero-initialized).
   void Init() {
     internal_memset(this, 0, sizeof(*this));
@@ -361,17 +560,18 @@
  public:
   void Init() {
     internal_memset(this, 0, sizeof(*this));
+    page_size_ = GetPageSizeCached();
   }
   void *Allocate(uptr size, uptr alignment) {
     CHECK(IsPowerOfTwo(alignment));
     uptr map_size = RoundUpMapSize(size);
-    if (alignment > kPageSize)
+    if (alignment > page_size_)
       map_size += alignment;
     if (map_size < size) return 0;  // Overflow.
     uptr map_beg = reinterpret_cast<uptr>(
         MmapOrDie(map_size, "LargeMmapAllocator"));
     uptr map_end = map_beg + map_size;
-    uptr res = map_beg + kPageSize;
+    uptr res = map_beg + page_size_;
     if (res & (alignment - 1))  // Align.
       res += alignment - (res & (alignment - 1));
     CHECK_EQ(0, res & (alignment - 1));
@@ -418,7 +618,7 @@
 
   bool PointerIsMine(void *p) {
     // Fast check.
-    if ((reinterpret_cast<uptr>(p) % kPageSize) != 0) return false;
+    if ((reinterpret_cast<uptr>(p) & (page_size_ - 1))) return false;
     SpinMutexLock l(&mutex_);
     for (Header *l = list_; l; l = l->next) {
       if (GetUser(l) == p) return true;
@@ -427,10 +627,10 @@
   }
 
   uptr GetActuallyAllocatedSize(void *p) {
-    return RoundUpMapSize(GetHeader(p)->size) - kPageSize;
+    return RoundUpMapSize(GetHeader(p)->size) - page_size_;
   }
 
-  // At least kPageSize/2 metadata bytes is available.
+  // At least page_size_/2 metadata bytes is available.
   void *GetMetaData(void *p) {
     return GetHeader(p) + 1;
   }
@@ -454,17 +654,22 @@
     Header *prev;
   };
 
-  Header *GetHeader(uptr p) { return reinterpret_cast<Header*>(p - kPageSize); }
+  Header *GetHeader(uptr p) {
+    CHECK_EQ(p % page_size_, 0);
+    return reinterpret_cast<Header*>(p - page_size_);
+  }
   Header *GetHeader(void *p) { return GetHeader(reinterpret_cast<uptr>(p)); }
 
   void *GetUser(Header *h) {
-    return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + kPageSize);
+    CHECK_EQ((uptr)h % page_size_, 0);
+    return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
   }
 
   uptr RoundUpMapSize(uptr size) {
-    return RoundUpTo(size, kPageSize) + kPageSize;
+    return RoundUpTo(size, page_size_) + page_size_;
   }
 
+  uptr page_size_;
   Header *list_;
   SpinMutex mutex_;
 };
@@ -573,3 +778,4 @@
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_ALLOCATOR_H
+
diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc
index 614b7b9..ca1f6bd 100644
--- a/lib/sanitizer_common/sanitizer_common.cc
+++ b/lib/sanitizer_common/sanitizer_common.cc
@@ -16,7 +16,16 @@
 
 namespace __sanitizer {
 
-static fd_t report_fd = 2;  // By default, dump to stderr.
+uptr GetPageSizeCached() {
+  static uptr PageSize;
+  if (!PageSize)
+    PageSize = GetPageSize();
+  return PageSize;
+}
+
+// By default, dump to stderr. If report_fd is kInvalidFd, try to obtain file
+// descriptor by opening file in report_path.
+static fd_t report_fd = kStderrFd;
 static char report_path[4096];  // Set via __sanitizer_set_report_path.
 
 static void (*DieCallback)(void);
@@ -46,18 +55,27 @@
   Die();
 }
 
+static void MaybeOpenReportFile() {
+  if (report_fd != kInvalidFd)
+    return;
+  fd_t fd = internal_open(report_path, true);
+  if (fd == kInvalidFd) {
+    report_fd = kStderrFd;
+    Report("ERROR: Can't open file: %s\n", report_path);
+    Die();
+  }
+  report_fd = fd;
+}
+
+bool PrintsToTty() {
+  MaybeOpenReportFile();
+  return internal_isatty(report_fd);
+}
+
 void RawWrite(const char *buffer) {
   static const char *kRawWriteError = "RawWrite can't output requested buffer!";
   uptr length = (uptr)internal_strlen(buffer);
-  if (report_fd == kInvalidFd) {
-    fd_t fd = internal_open(report_path, true);
-    if (fd == kInvalidFd) {
-      report_fd = 2;
-      Report("ERROR: Can't open file: %s\n", report_path);
-      Die();
-    }
-    report_fd = fd;
-  }
+  MaybeOpenReportFile();
   if (length != internal_write(report_fd, buffer, length)) {
     internal_write(report_fd, kRawWriteError, internal_strlen(kRawWriteError));
     Die();
@@ -66,7 +84,8 @@
 
 uptr ReadFileToBuffer(const char *file_name, char **buff,
                       uptr *buff_size, uptr max_len) {
-  const uptr kMinFileLen = kPageSize;
+  uptr PageSize = GetPageSizeCached();
+  uptr kMinFileLen = PageSize;
   uptr read_len = 0;
   *buff = 0;
   *buff_size = 0;
@@ -80,8 +99,8 @@
     // Read up to one page at a time.
     read_len = 0;
     bool reached_eof = false;
-    while (read_len + kPageSize <= size) {
-      uptr just_read = internal_read(fd, *buff + read_len, kPageSize);
+    while (read_len + PageSize <= size) {
+      uptr just_read = internal_read(fd, *buff + read_len, PageSize);
       if (just_read == 0) {
         reached_eof = true;
         break;
@@ -136,18 +155,56 @@
   }
 }
 
+// We want to map a chunk of address space aligned to 'alignment'.
+// We do it by maping a bit more and then unmaping redundant pieces.
+// We probably can do it with fewer syscalls in some OS-dependent way.
+void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type) {
+// uptr PageSize = GetPageSizeCached();
+  CHECK(IsPowerOfTwo(size));
+  CHECK(IsPowerOfTwo(alignment));
+  uptr map_size = size + alignment;
+  uptr map_res = (uptr)MmapOrDie(map_size, mem_type);
+  uptr map_end = map_res + map_size;
+  uptr res = map_res;
+  if (res & (alignment - 1))  // Not aligned.
+    res = (map_res + alignment) & ~(alignment - 1);
+  uptr end = res + size;
+  if (res != map_res)
+    UnmapOrDie((void*)map_res, res - map_res);
+  if (end != map_end)
+    UnmapOrDie((void*)end, map_end - end);
+  return (void*)res;
+}
+
 }  // namespace __sanitizer
 
+using namespace __sanitizer;  // NOLINT
+
+extern "C" {
 void __sanitizer_set_report_path(const char *path) {
   if (!path) return;
   uptr len = internal_strlen(path);
-  if (len > sizeof(__sanitizer::report_path) - 100) {
+  if (len > sizeof(report_path) - 100) {
     Report("ERROR: Path is too long: %c%c%c%c%c%c%c%c...\n",
            path[0], path[1], path[2], path[3],
            path[4], path[5], path[6], path[7]);
     Die();
   }
-  internal_snprintf(__sanitizer::report_path,
-                    sizeof(__sanitizer::report_path), "%s.%d", path, GetPid());
-  __sanitizer::report_fd = kInvalidFd;
+  internal_snprintf(report_path, sizeof(report_path), "%s.%d", path, GetPid());
+  report_fd = kInvalidFd;
 }
+
+void __sanitizer_set_report_fd(int fd) {
+  if (report_fd != kStdoutFd &&
+      report_fd != kStderrFd &&
+      report_fd != kInvalidFd)
+    internal_close(report_fd);
+  report_fd = fd;
+}
+
+void NOINLINE __sanitizer_sandbox_on_notify(void *reserved) {
+  (void)reserved;
+  PrepareForSandboxing();
+}
+
+}  // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_common.h b/lib/sanitizer_common/sanitizer_common.h
index 323b066..77fcc5c 100644
--- a/lib/sanitizer_common/sanitizer_common.h
+++ b/lib/sanitizer_common/sanitizer_common.h
@@ -21,17 +21,18 @@
 namespace __sanitizer {
 
 // Constants.
-const uptr kWordSize = __WORDSIZE / 8;
+const uptr kWordSize = SANITIZER_WORDSIZE / 8;
 const uptr kWordSizeInBits = 8 * kWordSize;
-const uptr kPageSizeBits = 12;
-const uptr kPageSize = 1UL << kPageSizeBits;
-const uptr kCacheLineSize = 64;
-#ifndef _WIN32
-const uptr kMmapGranularity = kPageSize;
+
+#if defined(__powerpc__) || defined(__powerpc64__)
+const uptr kCacheLineSize = 128;
 #else
-const uptr kMmapGranularity = 1UL << 16;
+const uptr kCacheLineSize = 64;
 #endif
 
+uptr GetPageSize();
+uptr GetPageSizeCached();
+uptr GetMmapGranularity();
 // Threads
 int GetPid();
 uptr GetTid();
@@ -44,15 +45,14 @@
 void UnmapOrDie(void *addr, uptr size);
 void *MmapFixedNoReserve(uptr fixed_addr, uptr size);
 void *Mprotect(uptr fixed_addr, uptr size);
+// Map aligned chunk of address space; size and alignment are powers of two.
+void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type);
 // Used to check if we can map shadow memory to a fixed location.
 bool MemoryRangeIsAvailable(uptr range_start, uptr range_end);
 
 // Internal allocator
 void *InternalAlloc(uptr size);
 void InternalFree(void *p);
-// Given the pointer p into a valid allocated block,
-// returns a pointer to the beginning of the block.
-void *InternalAllocBlock(void *p);
 
 // InternalScopedBuffer can be used instead of large stack arrays to
 // keep frame size low.
@@ -98,6 +98,7 @@
 
 // IO
 void RawWrite(const char *buffer);
+bool PrintsToTty();
 void Printf(const char *format, ...);
 void Report(const char *format, ...);
 void SetPrintfAndReportCallback(void (*callback)(const char *));
@@ -116,11 +117,13 @@
 // OS
 void DisableCoreDumper();
 void DumpProcessMap();
+bool FileExists(const char *filename);
 const char *GetEnv(const char *name);
 const char *GetPwd();
 void ReExec();
 bool StackSizeIsUnlimited();
 void SetStackSizeLimitInBytes(uptr limit);
+void PrepareForSandboxing();
 
 // Other
 void SleepForSeconds(int seconds);
@@ -135,6 +138,13 @@
 void NORETURN SANITIZER_INTERFACE_ATTRIBUTE
 CheckFailed(const char *file, int line, const char *cond, u64 v1, u64 v2);
 
+// Set the name of the current thread to 'name', return true on succees.
+// The name may be truncated to a system-dependent limit.
+bool SanitizerSetThreadName(const char *name);
+// Get the name of the current thread (no more than max_len bytes),
+// return true on succees. name should have space for at least max_len+1 bytes.
+bool SanitizerGetThreadName(char *name, int max_len);
+
 // Specific tools may override behavior of "Die" and "CheckFailed" functions
 // to do tool-specific job.
 void SetDieCallback(void (*callback)(void));
@@ -172,7 +182,7 @@
   return (c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c;
 }
 
-#if __WORDSIZE == 64
+#if SANITIZER_WORDSIZE == 64
 # define FIRST_32_SECOND_64(a, b) (b)
 #else
 # define FIRST_32_SECOND_64(a, b) (a)
diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h
index 9ace42a..2e56fac 100644
--- a/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -24,8 +24,7 @@
 #define WEAK SANITIZER_WEAK_ATTRIBUTE
 
 // Platform-specific defs.
-#if defined(_WIN32)
-typedef unsigned long    DWORD;  // NOLINT
+#if defined(_MSC_VER)
 # define ALWAYS_INLINE __declspec(forceinline)
 // FIXME(timurrrr): do we need this on Windows?
 # define ALIAS(x)
@@ -35,7 +34,11 @@
 # define NORETURN __declspec(noreturn)
 # define THREADLOCAL   __declspec(thread)
 # define NOTHROW
-#else  // _WIN32
+# define LIKELY(x) (x)
+# define UNLIKELY(x) (x)
+# define UNUSED
+# define USED
+#else  // _MSC_VER
 # define ALWAYS_INLINE __attribute__((always_inline))
 # define ALIAS(x) __attribute__((alias(x)))
 # define ALIGNED(x) __attribute__((aligned(x)))
@@ -43,22 +46,15 @@
 # define NOINLINE __attribute__((noinline))
 # define NORETURN  __attribute__((noreturn))
 # define THREADLOCAL   __thread
-# ifdef __cplusplus
-#   define NOTHROW throw()
-# else
-#   define NOTHROW __attribute__((__nothrow__))
-#endif
-#endif  // _WIN32
-
-// We have no equivalent of these on Windows.
-#ifndef _WIN32
+# define NOTHROW throw()
 # define LIKELY(x)     __builtin_expect(!!(x), 1)
 # define UNLIKELY(x)   __builtin_expect(!!(x), 0)
 # define UNUSED __attribute__((unused))
 # define USED __attribute__((used))
-#endif
+#endif  // _MSC_VER
 
 #if defined(_WIN32)
+typedef unsigned long DWORD;  // NOLINT
 typedef DWORD thread_return_t;
 # define THREAD_CALLING_CONV __stdcall
 #else  // _WIN32
@@ -67,15 +63,11 @@
 #endif  // _WIN32
 typedef thread_return_t (THREAD_CALLING_CONV *thread_callback_t)(void* arg);
 
-// If __WORDSIZE was undefined by the platform, define it in terms of the
-// compiler built-ins __LP64__ and _WIN64.
-#ifndef __WORDSIZE
-# if __LP64__ || defined(_WIN64)
-#  define __WORDSIZE 64
-# else
-#  define __WORDSIZE 32
-#  endif
-#endif  // __WORDSIZE
+#if __LP64__ || defined(_WIN64)
+#  define SANITIZER_WORDSIZE 64
+#else
+#  define SANITIZER_WORDSIZE 32
+#endif
 
 // NOTE: Functions below must be defined in each run-time.
 namespace __sanitizer {
@@ -149,13 +141,13 @@
 // have stdint.h (like in Visual Studio 9).
 #undef __INT64_C
 #undef __UINT64_C
-#if __WORDSIZE == 64
+#if SANITIZER_WORDSIZE == 64
 # define __INT64_C(c)  c ## L
 # define __UINT64_C(c) c ## UL
 #else
 # define __INT64_C(c)  c ## LL
 # define __UINT64_C(c) c ## ULL
-#endif  // __WORDSIZE == 64
+#endif  // SANITIZER_WORDSIZE == 64
 #undef INT32_MIN
 #define INT32_MIN              (-2147483647-1)
 #undef INT32_MAX
diff --git a/lib/sanitizer_common/sanitizer_libc.h b/lib/sanitizer_common/sanitizer_libc.h
index ec0b061..7979483 100644
--- a/lib/sanitizer_common/sanitizer_libc.h
+++ b/lib/sanitizer_common/sanitizer_libc.h
@@ -55,7 +55,11 @@
 // I/O
 typedef int fd_t;
 const fd_t kInvalidFd = -1;
+const fd_t kStdinFd = 0;
+const fd_t kStdoutFd = 1;
+const fd_t kStderrFd = 2;
 int internal_close(fd_t fd);
+int internal_isatty(fd_t fd);
 fd_t internal_open(const char *filename, bool write);
 uptr internal_read(fd_t fd, void *buf, uptr count);
 uptr internal_write(fd_t fd, const void *buf, uptr count);
diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc
index 6be90ff..5be76e9 100644
--- a/lib/sanitizer_common/sanitizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_linux.cc
@@ -16,6 +16,7 @@
 #include "sanitizer_common.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_libc.h"
+#include "sanitizer_mutex.h"
 #include "sanitizer_placement_new.h"
 #include "sanitizer_procmaps.h"
 
@@ -30,13 +31,23 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include <errno.h>
+#include <sys/prctl.h>
+
+// Are we using 32-bit or 64-bit syscalls?
+// x32 (which defines __x86_64__) has SANITIZER_WORDSIZE == 32
+// but it still needs to use 64-bit syscalls.
+#if defined(__x86_64__) || SANITIZER_WORDSIZE == 64
+# define SANITIZER_LINUX_USES_64BIT_SYSCALLS 1
+#else
+# define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0
+#endif
 
 namespace __sanitizer {
 
 // --------------- sanitizer_libc.h
 void *internal_mmap(void *addr, uptr length, int prot, int flags,
                     int fd, u64 offset) {
-#if __WORDSIZE == 64
+#if SANITIZER_LINUX_USES_64BIT_SYSCALLS
   return (void *)syscall(__NR_mmap, addr, length, prot, flags, fd, offset);
 #else
   return (void *)syscall(__NR_mmap2, addr, length, prot, flags, fd, offset);
@@ -69,7 +80,7 @@
 }
 
 uptr internal_filesize(fd_t fd) {
-#if __WORDSIZE == 64
+#if SANITIZER_LINUX_USES_64BIT_SYSCALLS
   struct stat st;
   if (syscall(__NR_fstat, fd, &st))
     return -1;
@@ -94,6 +105,20 @@
 }
 
 // ----------------- sanitizer_common.h
+bool FileExists(const char *filename) {
+#if SANITIZER_LINUX_USES_64BIT_SYSCALLS
+  struct stat st;
+  if (syscall(__NR_stat, filename, &st))
+    return false;
+#else
+  struct stat64 st;
+  if (syscall(__NR_stat64, filename, &st))
+    return false;
+#endif
+  // Sanity check: filename is a regular file.
+  return S_ISREG(st.st_mode);
+}
+
 uptr GetTid() {
   return syscall(__NR_gettid);
 }
@@ -193,22 +218,69 @@
   execv(argv[0], argv.data());
 }
 
+void PrepareForSandboxing() {
+  // Some kinds of sandboxes may forbid filesystem access, so we won't be able
+  // to read the file mappings from /proc/self/maps. Luckily, neither the
+  // process will be able to load additional libraries, so it's fine to use the
+  // cached mappings.
+  MemoryMappingLayout::CacheMemoryMappings();
+}
+
 // ----------------- sanitizer_procmaps.h
+// Linker initialized.
+ProcSelfMapsBuff MemoryMappingLayout::cached_proc_self_maps_;
+StaticSpinMutex MemoryMappingLayout::cache_lock_;  // Linker initialized.
+
 MemoryMappingLayout::MemoryMappingLayout() {
-  proc_self_maps_buff_len_ =
-      ReadFileToBuffer("/proc/self/maps", &proc_self_maps_buff_,
-                       &proc_self_maps_buff_mmaped_size_, 1 << 26);
-  CHECK_GT(proc_self_maps_buff_len_, 0);
-  // internal_write(2, proc_self_maps_buff_, proc_self_maps_buff_len_);
+  proc_self_maps_.len =
+      ReadFileToBuffer("/proc/self/maps", &proc_self_maps_.data,
+                       &proc_self_maps_.mmaped_size, 1 << 26);
+  if (proc_self_maps_.mmaped_size == 0) {
+    LoadFromCache();
+    CHECK_GT(proc_self_maps_.len, 0);
+  }
+  // internal_write(2, proc_self_maps_.data, proc_self_maps_.len);
   Reset();
+  // FIXME: in the future we may want to cache the mappings on demand only.
+  CacheMemoryMappings();
 }
 
 MemoryMappingLayout::~MemoryMappingLayout() {
-  UnmapOrDie(proc_self_maps_buff_, proc_self_maps_buff_mmaped_size_);
+  // Only unmap the buffer if it is different from the cached one. Otherwise
+  // it will be unmapped when the cache is refreshed.
+  if (proc_self_maps_.data != cached_proc_self_maps_.data) {
+    UnmapOrDie(proc_self_maps_.data, proc_self_maps_.mmaped_size);
+  }
 }
 
 void MemoryMappingLayout::Reset() {
-  current_ = proc_self_maps_buff_;
+  current_ = proc_self_maps_.data;
+}
+
+// static
+void MemoryMappingLayout::CacheMemoryMappings() {
+  SpinMutexLock l(&cache_lock_);
+  // Don't invalidate the cache if the mappings are unavailable.
+  ProcSelfMapsBuff old_proc_self_maps;
+  old_proc_self_maps = cached_proc_self_maps_;
+  cached_proc_self_maps_.len =
+      ReadFileToBuffer("/proc/self/maps", &cached_proc_self_maps_.data,
+                       &cached_proc_self_maps_.mmaped_size, 1 << 26);
+  if (cached_proc_self_maps_.mmaped_size == 0) {
+    cached_proc_self_maps_ = old_proc_self_maps;
+  } else {
+    if (old_proc_self_maps.mmaped_size) {
+      UnmapOrDie(old_proc_self_maps.data,
+                 old_proc_self_maps.mmaped_size);
+    }
+  }
+}
+
+void MemoryMappingLayout::LoadFromCache() {
+  SpinMutexLock l(&cache_lock_);
+  if (cached_proc_self_maps_.data) {
+    proc_self_maps_ = cached_proc_self_maps_;
+  }
 }
 
 // Parse a hex value in str and update str.
@@ -242,7 +314,7 @@
 
 bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
                                char filename[], uptr filename_size) {
-  char *last = proc_self_maps_buff_ + proc_self_maps_buff_len_;
+  char *last = proc_self_maps_.data + proc_self_maps_.len;
   if (current_ >= last) return false;
   uptr dummy;
   if (!start) start = &dummy;
@@ -293,6 +365,19 @@
   return IterateForObjectNameAndOffset(addr, offset, filename, filename_size);
 }
 
+bool SanitizerSetThreadName(const char *name) {
+  return 0 == prctl(PR_SET_NAME, (unsigned long)name, 0, 0, 0);  // NOLINT
+}
+
+bool SanitizerGetThreadName(char *name, int max_len) {
+  char buff[17];
+  if (prctl(PR_GET_NAME, (unsigned long)buff, 0, 0, 0))  // NOLINT
+    return false;
+  internal_strncpy(name, buff, max_len);
+  name[max_len] = 0;
+  return true;
+}
+
 }  // namespace __sanitizer
 
 #endif  // __linux__
diff --git a/lib/sanitizer_common/sanitizer_mac.cc b/lib/sanitizer_common/sanitizer_mac.cc
index 6be53cd..e156eaa 100644
--- a/lib/sanitizer_common/sanitizer_mac.cc
+++ b/lib/sanitizer_common/sanitizer_mac.cc
@@ -80,6 +80,14 @@
 }
 
 // ----------------- sanitizer_common.h
+bool FileExists(const char *filename) {
+  struct stat st;
+  if (stat(filename, &st))
+    return false;
+  // Sanity check: filename is a regular file.
+  return S_ISREG(st.st_mode);
+}
+
 uptr GetTid() {
   return reinterpret_cast<uptr>(pthread_self());
 }
@@ -118,6 +126,10 @@
   UNIMPLEMENTED();
 }
 
+void PrepareForSandboxing() {
+  // Nothing here for now.
+}
+
 // ----------------- sanitizer_procmaps.h
 
 MemoryMappingLayout::MemoryMappingLayout() {
@@ -154,6 +166,15 @@
   current_filetype_ = 0;
 }
 
+// static
+void MemoryMappingLayout::CacheMemoryMappings() {
+  // No-op on Mac for now.
+}
+
+void MemoryMappingLayout::LoadFromCache() {
+  // No-op on Mac for now.
+}
+
 // Next and NextSegmentLoad were inspired by base/sysinfo.cc in
 // Google Perftools, http://code.google.com/p/google-perftools.
 
diff --git a/lib/sanitizer_common/sanitizer_placement_new.h b/lib/sanitizer_common/sanitizer_placement_new.h
index f133a6f..c0b85e1 100644
--- a/lib/sanitizer_common/sanitizer_placement_new.h
+++ b/lib/sanitizer_common/sanitizer_placement_new.h
@@ -19,7 +19,7 @@
 #include "sanitizer_internal_defs.h"
 
 namespace __sanitizer {
-#if (__WORDSIZE == 64) || defined(__APPLE__)
+#if (SANITIZER_WORDSIZE == 64) || defined(__APPLE__)
 typedef uptr operator_new_ptr_type;
 #else
 typedef u32 operator_new_ptr_type;
diff --git a/lib/sanitizer_common/sanitizer_posix.cc b/lib/sanitizer_common/sanitizer_posix.cc
index 30325db..d5cf999 100644
--- a/lib/sanitizer_common/sanitizer_posix.cc
+++ b/lib/sanitizer_common/sanitizer_posix.cc
@@ -32,6 +32,13 @@
 namespace __sanitizer {
 
 // ------------- sanitizer_common.h
+uptr GetPageSize() {
+  return sysconf(_SC_PAGESIZE);
+}
+
+uptr GetMmapGranularity() {
+  return GetPageSize();
+}
 
 int GetPid() {
   return getpid();
@@ -42,7 +49,7 @@
 }
 
 void *MmapOrDie(uptr size, const char *mem_type) {
-  size = RoundUpTo(size, kPageSize);
+  size = RoundUpTo(size, GetPageSizeCached());
   void *res = internal_mmap(0, size,
                             PROT_READ | PROT_WRITE,
                             MAP_PRIVATE | MAP_ANON, -1, 0);
@@ -74,10 +81,16 @@
 }
 
 void *MmapFixedNoReserve(uptr fixed_addr, uptr size) {
-  return internal_mmap((void*)fixed_addr, size,
-                      PROT_READ | PROT_WRITE,
-                      MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
-                      -1, 0);
+  uptr PageSize = GetPageSizeCached();
+  void *p = internal_mmap((void*)(fixed_addr & ~(PageSize - 1)),
+      RoundUpTo(size, PageSize),
+      PROT_READ | PROT_WRITE,
+      MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
+      -1, 0);
+  if (p == (void*)-1)
+    Report("ERROR: Failed to allocate 0x%zx (%zd) bytes at address %p (%d)\n",
+           size, size, fixed_addr, errno);
+  return p;
 }
 
 void *Mprotect(uptr fixed_addr, uptr size) {
@@ -93,7 +106,7 @@
   uptr fsize = internal_filesize(fd);
   CHECK_NE(fsize, (uptr)-1);
   CHECK_GT(fsize, 0);
-  *buff_size = RoundUpTo(fsize, kPageSize);
+  *buff_size = RoundUpTo(fsize, GetPageSizeCached());
   void *map = internal_mmap(0, *buff_size, PROT_READ, MAP_PRIVATE, fd, 0);
   return (map == MAP_FAILED) ? 0 : map;
 }
@@ -156,7 +169,10 @@
   struct rlimit rlim;
   rlim.rlim_cur = limit;
   rlim.rlim_max = limit;
-  CHECK_EQ(0, setrlimit(RLIMIT_STACK, &rlim));
+  if (setrlimit(RLIMIT_STACK, &rlim)) {
+    Report("setrlimit() failed %d\n", errno);
+    Die();
+  }
   CHECK(!StackSizeIsUnlimited());
 }
 
@@ -184,6 +200,10 @@
 #endif
 }
 
+int internal_isatty(fd_t fd) {
+  return isatty(fd);
+}
+
 }  // namespace __sanitizer
 
 #endif  // __linux__ || __APPLE_
diff --git a/lib/sanitizer_common/sanitizer_printf.cc b/lib/sanitizer_common/sanitizer_printf.cc
index 93d4fa9..b671298 100644
--- a/lib/sanitizer_common/sanitizer_printf.cc
+++ b/lib/sanitizer_common/sanitizer_printf.cc
@@ -45,7 +45,12 @@
     num_buffer[pos++] = num % base;
     num /= base;
   } while (num > 0);
-  while (pos < minimal_num_length) num_buffer[pos++] = 0;
+  if (pos < minimal_num_length) {
+    // Make sure compiler doesn't insert call to memset here.
+    internal_memset(&num_buffer[pos], 0,
+                    sizeof(num_buffer[0]) * (minimal_num_length - pos));
+    pos = minimal_num_length;
+  }
   int result = 0;
   while (pos-- > 0) {
     uptr digit = num_buffer[pos];
@@ -55,13 +60,16 @@
   return result;
 }
 
-static int AppendSignedDecimal(char **buff, const char *buff_end, s64 num) {
+static int AppendSignedDecimal(char **buff, const char *buff_end, s64 num,
+                               u8 minimal_num_length) {
   int result = 0;
   if (num < 0) {
     result += AppendChar(buff, buff_end, '-');
     num = -num;
+    if (minimal_num_length)
+      --minimal_num_length;
   }
-  result += AppendUnsigned(buff, buff_end, (u64)num, 10, 0);
+  result += AppendUnsigned(buff, buff_end, (u64)num, 10, minimal_num_length);
   return result;
 }
 
@@ -79,14 +87,14 @@
   int result = 0;
   result += AppendString(buff, buff_end, "0x");
   result += AppendUnsigned(buff, buff_end, ptr_value, 16,
-                           (__WORDSIZE == 64) ? 12 : 8);
+                           (SANITIZER_WORDSIZE == 64) ? 12 : 8);
   return result;
 }
 
 int VSNPrintf(char *buff, int buff_length,
               const char *format, va_list args) {
-  static const char *kPrintfFormatsHelp = "Supported Printf formats: "
-                                          "%%[z]{d,u,x}; %%p; %%s; %%c\n";
+  static const char *kPrintfFormatsHelp =
+    "Supported Printf formats: %%(0[0-9]*)?(z|ll)?{d,u,x}; %%p; %%s; %%c\n";
   RAW_CHECK(format);
   RAW_CHECK(buff_length > 0);
   const char *buff_end = &buff[buff_length - 1];
@@ -98,42 +106,55 @@
       continue;
     }
     cur++;
+    bool have_width = (*cur == '0');
+    int width = 0;
+    if (have_width) {
+      while (*cur >= '0' && *cur <= '9') {
+        have_width = true;
+        width = width * 10 + *cur++ - '0';
+      }
+    }
     bool have_z = (*cur == 'z');
     cur += have_z;
+    bool have_ll = !have_z && (cur[0] == 'l' && cur[1] == 'l');
+    cur += have_ll * 2;
     s64 dval;
     u64 uval;
+    bool have_flags = have_width | have_z | have_ll;
     switch (*cur) {
       case 'd': {
-        dval = have_z ? va_arg(args, sptr)
-                      : va_arg(args, int);
-        result += AppendSignedDecimal(&buff, buff_end, dval);
+        dval = have_ll ? va_arg(args, s64)
+             : have_z ? va_arg(args, sptr)
+             : va_arg(args, int);
+        result += AppendSignedDecimal(&buff, buff_end, dval, width);
         break;
       }
       case 'u':
       case 'x': {
-        uval = have_z ? va_arg(args, uptr)
-                      : va_arg(args, unsigned);
+        uval = have_ll ? va_arg(args, u64)
+             : have_z ? va_arg(args, uptr)
+             : va_arg(args, unsigned);
         result += AppendUnsigned(&buff, buff_end, uval,
-                                 (*cur == 'u') ? 10 : 16, 0);
+                                 (*cur == 'u') ? 10 : 16, width);
         break;
       }
       case 'p': {
-        RAW_CHECK_MSG(!have_z, kPrintfFormatsHelp);
+        RAW_CHECK_MSG(!have_flags, kPrintfFormatsHelp);
         result += AppendPointer(&buff, buff_end, va_arg(args, uptr));
         break;
       }
       case 's': {
-        RAW_CHECK_MSG(!have_z, kPrintfFormatsHelp);
+        RAW_CHECK_MSG(!have_flags, kPrintfFormatsHelp);
         result += AppendString(&buff, buff_end, va_arg(args, char*));
         break;
       }
       case 'c': {
-        RAW_CHECK_MSG(!have_z, kPrintfFormatsHelp);
+        RAW_CHECK_MSG(!have_flags, kPrintfFormatsHelp);
         result += AppendChar(&buff, buff_end, va_arg(args, int));
         break;
       }
       case '%' : {
-        RAW_CHECK_MSG(!have_z, kPrintfFormatsHelp);
+        RAW_CHECK_MSG(!have_flags, kPrintfFormatsHelp);
         result += AppendChar(&buff, buff_end, '%');
         break;
       }
@@ -153,7 +174,7 @@
 }
 
 void Printf(const char *format, ...) {
-  const int kLen = 1024 * 4;
+  const int kLen = 16 * 1024;
   InternalScopedBuffer<char> buffer(kLen);
   va_list args;
   va_start(args, format);
@@ -179,7 +200,7 @@
 
 // Like Printf, but prints the current PID before the output string.
 void Report(const char *format, ...) {
-  const int kLen = 1024 * 4;
+  const int kLen = 16 * 1024;
   InternalScopedBuffer<char> buffer(kLen);
   int needed_length = internal_snprintf(buffer.data(),
                                         kLen, "==%d== ", GetPid());
diff --git a/lib/sanitizer_common/sanitizer_procmaps.h b/lib/sanitizer_common/sanitizer_procmaps.h
index f4f6241..1b8ea7a 100644
--- a/lib/sanitizer_common/sanitizer_procmaps.h
+++ b/lib/sanitizer_common/sanitizer_procmaps.h
@@ -15,6 +15,7 @@
 #define SANITIZER_PROCMAPS_H
 
 #include "sanitizer_internal_defs.h"
+#include "sanitizer_mutex.h"
 
 namespace __sanitizer {
 
@@ -29,6 +30,14 @@
 };
 
 #else  // _WIN32
+#if defined(__linux__)
+struct ProcSelfMapsBuff {
+  char *data;
+  uptr mmaped_size;
+  uptr len;
+};
+#endif  // defined(__linux__)
+
 class MemoryMappingLayout {
  public:
   MemoryMappingLayout();
@@ -39,9 +48,14 @@
   // address 'addr'. Returns true on success.
   bool GetObjectNameAndOffset(uptr addr, uptr *offset,
                               char filename[], uptr filename_size);
+  // In some cases, e.g. when running under a sandbox on Linux, ASan is unable
+  // to obtain the memory mappings. It should fall back to pre-cached data
+  // instead of aborting.
+  static void CacheMemoryMappings();
   ~MemoryMappingLayout();
 
  private:
+  void LoadFromCache();
   // Default implementation of GetObjectNameAndOffset.
   // Quite slow, because it iterates through the whole process map for each
   // lookup.
@@ -73,10 +87,12 @@
   }
 
 # if defined __linux__
-  char *proc_self_maps_buff_;
-  uptr proc_self_maps_buff_mmaped_size_;
-  uptr proc_self_maps_buff_len_;
+  ProcSelfMapsBuff proc_self_maps_;
   char *current_;
+
+  // Static mappings cache.
+  static ProcSelfMapsBuff cached_proc_self_maps_;
+  static StaticSpinMutex cache_lock_;  // protects cached_proc_self_maps_.
 # elif defined __APPLE__
   template<u32 kLCSegment, typename SegmentCommand>
   bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset,
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.cc b/lib/sanitizer_common/sanitizer_stacktrace.cc
index 0a54e7c..7525895 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace.cc
@@ -33,7 +33,14 @@
   // Cancel Thumb bit.
   pc = pc & (~1);
 #endif
+#if defined(__powerpc__) || defined(__powerpc64__)
+  // PCs are always 4 byte aligned.
+  return pc - 4;
+#elif defined(__sparc__)
+  return pc - 8;
+#else
   return pc - 1;
+#endif
 }
 
 static void PrintStackFramePrefix(uptr frame_num, uptr pc) {
@@ -60,7 +67,7 @@
                             bool symbolize, const char *strip_file_prefix,
                             SymbolizeCallback symbolize_callback ) {
   MemoryMappingLayout proc_maps;
-  InternalScopedBuffer<char> buff(kPageSize * 2);
+  InternalScopedBuffer<char> buff(GetPageSizeCached() * 2);
   InternalScopedBuffer<AddressInfo> addr_frames(64);
   uptr frame_num = 0;
   for (uptr i = 0; i < size && addr[i]; i++) {
@@ -123,18 +130,26 @@
                                  uptr stack_top, uptr stack_bottom) {
   CHECK(size == 0 && trace[0] == pc);
   size = 1;
-  uptr *frame = (uptr*)bp;
-  uptr *prev_frame = frame;
+  uhwptr *frame = (uhwptr *)bp;
+  uhwptr *prev_frame = frame;
   while (frame >= prev_frame &&
-         frame < (uptr*)stack_top - 2 &&
-         frame > (uptr*)stack_bottom &&
+         frame < (uhwptr *)stack_top - 2 &&
+         frame > (uhwptr *)stack_bottom &&
          size < max_size) {
-    uptr pc1 = frame[1];
+    uhwptr pc1 = frame[1];
     if (pc1 != pc) {
-      trace[size++] = pc1;
+      trace[size++] = (uptr) pc1;
     }
     prev_frame = frame;
-    frame = (uptr*)frame[0];
+    frame = (uhwptr *)frame[0];
+  }
+}
+
+void StackTrace::PopStackFrames(uptr count) {
+  CHECK(size >= count);
+  size -= count;
+  for (uptr i = 0; i < size; i++) {
+    trace[i] = trace[i + count];
   }
 }
 
@@ -143,7 +158,7 @@
 // the previous one, we record a 31-bit offset instead of the full pc.
 SANITIZER_INTERFACE_ATTRIBUTE
 uptr StackTrace::CompressStack(StackTrace *stack, u32 *compressed, uptr size) {
-#if __WORDSIZE == 32
+#if SANITIZER_WORDSIZE == 32
   // Don't compress, just copy.
   uptr res = 0;
   for (uptr i = 0; i < stack->size && i < size; i++) {
@@ -184,7 +199,7 @@
     compressed[c_index] = 0;
   if (c_index + 1 < size)
     compressed[c_index + 1] = 0;
-#endif  // __WORDSIZE
+#endif  // SANITIZER_WORDSIZE
 
   // debug-only code
 #if 0
@@ -207,7 +222,7 @@
 SANITIZER_INTERFACE_ATTRIBUTE
 void StackTrace::UncompressStack(StackTrace *stack,
                                  u32 *compressed, uptr size) {
-#if __WORDSIZE == 32
+#if SANITIZER_WORDSIZE == 32
   // Don't uncompress, just copy.
   stack->size = 0;
   for (uptr i = 0; i < size && i < kStackTraceMax; i++) {
@@ -242,7 +257,7 @@
     stack->trace[stack->size++] = pc;
     prev_pc = pc;
   }
-#endif  // __WORDSIZE
+#endif  // SANITIZER_WORDSIZE
 }
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.h b/lib/sanitizer_common/sanitizer_stacktrace.h
index b823a7e..fe2dcf0 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.h
+++ b/lib/sanitizer_common/sanitizer_stacktrace.h
@@ -45,6 +45,8 @@
 
   void FastUnwindStack(uptr pc, uptr bp, uptr stack_top, uptr stack_bottom);
 
+  void PopStackFrames(uptr count);
+
   static uptr GetCurrentPc();
 
   static uptr CompressStack(StackTrace *stack,
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.cc b/lib/sanitizer_common/sanitizer_symbolizer.cc
index 30bcd96..d52cd07 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer.cc
@@ -256,6 +256,17 @@
     // Otherwise, the data was filled by external symbolizer.
     return actual_frames;
   }
+
+  bool SymbolizeData(uptr addr, AddressInfo *frame) {
+    LoadedModule *module = FindModuleForAddress(addr);
+    if (module == 0)
+      return false;
+    const char *module_name = module->full_name();
+    uptr module_offset = addr - module->base_address();
+    frame->FillAddressAndModuleInfo(addr, module_name, module_offset);
+    return true;
+  }
+
   bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
     int input_fd, output_fd;
     if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
@@ -307,6 +318,10 @@
   return symbolizer.SymbolizeCode(address, frames, max_frames);
 }
 
+bool SymbolizeData(uptr address, AddressInfo *frame) {
+  return symbolizer.SymbolizeData(address, frame);
+}
+
 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
   return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
 }
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.h b/lib/sanitizer_common/sanitizer_symbolizer.h
index 1042c8e..196e108 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -58,6 +58,7 @@
 // of descriptions actually filled.
 // This function should NOT be called from two threads simultaneously.
 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames);
+bool SymbolizeData(uptr address, AddressInfo *frame);
 
 // Starts external symbolizer program in a subprocess. Sanitizer communicates
 // with external symbolizer via pipes.
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_linux.cc b/lib/sanitizer_common/sanitizer_symbolizer_linux.cc
index 6125cda..4bd3dc8 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_linux.cc
@@ -23,6 +23,7 @@
 #include <poll.h>
 #include <sys/socket.h>
 #include <sys/types.h>
+#include <sys/wait.h>
 #include <unistd.h>
 
 #if !defined(__ANDROID__) && !defined(ANDROID)
@@ -31,8 +32,15 @@
 
 namespace __sanitizer {
 
+static const int kSymbolizerStartupTimeMillis = 10;
+
 bool StartSymbolizerSubprocess(const char *path_to_symbolizer,
                                int *input_fd, int *output_fd) {
+  if (!FileExists(path_to_symbolizer)) {
+    Report("WARNING: invalid path to external symbolizer!\n");
+    return false;
+  }
+
   int *infd = NULL;
   int *outfd = NULL;
   // The client program may close its stdin and/or stdout and/or stderr
@@ -99,6 +107,17 @@
   internal_close(infd[1]);
   *input_fd = infd[0];
   *output_fd = outfd[1];
+
+  // Check that symbolizer subprocess started successfully.
+  int pid_status;
+  SleepForMillis(kSymbolizerStartupTimeMillis);
+  int exited_pid = waitpid(pid, &pid_status, WNOHANG);
+  if (exited_pid != 0) {
+    // Either waitpid failed, or child has already exited.
+    Report("WARNING: external symbolizer didn't start up correctly!\n");
+    return false;
+  }
+
   return true;
 }
 
diff --git a/lib/sanitizer_common/sanitizer_win.cc b/lib/sanitizer_common/sanitizer_win.cc
index dbacc5a..49a4e8b 100644
--- a/lib/sanitizer_common/sanitizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_win.cc
@@ -12,6 +12,9 @@
 // sanitizer_libc.h.
 //===----------------------------------------------------------------------===//
 #ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#define NOGDI
+#include <stdlib.h>
 #include <windows.h>
 
 #include "sanitizer_common.h"
@@ -20,6 +23,18 @@
 namespace __sanitizer {
 
 // --------------------- sanitizer_common.h
+uptr GetPageSize() {
+  return 1U << 14;  // FIXME: is this configurable?
+}
+
+uptr GetMmapGranularity() {
+  return 1U << 16;  // FIXME: is this configurable?
+}
+
+bool FileExists(const char *filename) {
+  UNIMPLEMENTED();
+}
+
 int GetPid() {
   return GetProcessId(GetCurrentProcess());
 }
@@ -41,7 +56,6 @@
   *stack_bottom = (uptr)mbi.AllocationBase;
 }
 
-
 void *MmapOrDie(uptr size, const char *mem_type) {
   void *rv = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
   if (rv == 0) {
@@ -61,8 +75,12 @@
 }
 
 void *MmapFixedNoReserve(uptr fixed_addr, uptr size) {
-  return VirtualAlloc((LPVOID)fixed_addr, size,
-                      MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  void *p = VirtualAlloc((LPVOID)fixed_addr, size,
+      MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  if (p == 0)
+    Report("ERROR: Failed to allocate 0x%zx (%zd) bytes at %p (%d)\n",
+           size, size, fixed_addr, GetLastError());
+  return p;
 }
 
 void *Mprotect(uptr fixed_addr, uptr size) {
@@ -111,6 +129,10 @@
   UNIMPLEMENTED();
 }
 
+void PrepareForSandboxing() {
+  // Nothing here for now.
+}
+
 bool StackSizeIsUnlimited() {
   UNIMPLEMENTED();
 }
@@ -136,9 +158,11 @@
   _exit(-1);  // abort is not NORETURN on Windows.
 }
 
+#ifndef SANITIZER_GO
 int Atexit(void (*function)(void)) {
   return atexit(function);
 }
+#endif
 
 // ------------------ sanitizer_libc.h
 void *internal_mmap(void *addr, uptr length, int prot, int flags,
@@ -154,6 +178,10 @@
   UNIMPLEMENTED();
 }
 
+int internal_isatty(fd_t fd) {
+  UNIMPLEMENTED();
+}
+
 fd_t internal_open(const char *filename, bool write) {
   UNIMPLEMENTED();
 }
@@ -163,7 +191,7 @@
 }
 
 uptr internal_write(fd_t fd, const void *buf, uptr count) {
-  if (fd != 2)
+  if (fd != kStderrFd)
     UNIMPLEMENTED();
   HANDLE err = GetStdHandle(STD_ERROR_HANDLE);
   if (err == 0)
@@ -187,7 +215,8 @@
 }
 
 int internal_sched_yield() {
-  UNIMPLEMENTED();
+  Sleep(0);
+  return 0;
 }
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/scripts/check_lint.sh b/lib/sanitizer_common/scripts/check_lint.sh
index 37997c0..ca0cafa 100755
--- a/lib/sanitizer_common/scripts/check_lint.sh
+++ b/lib/sanitizer_common/scripts/check_lint.sh
@@ -13,7 +13,7 @@
 # Cpplint setup
 cd ${SCRIPT_DIR}
 if [ ! -d cpplint ]; then
-  svn co -r82 http://google-styleguide.googlecode.com/svn/trunk/cpplint cpplint
+  svn co -r83 http://google-styleguide.googlecode.com/svn/trunk/cpplint cpplint
 fi
 CPPLINT=${SCRIPT_DIR}/cpplint/cpplint.py
 
diff --git a/lib/sanitizer_common/tests/CMakeLists.txt b/lib/sanitizer_common/tests/CMakeLists.txt
index d4debc9..3baa08b 100644
--- a/lib/sanitizer_common/tests/CMakeLists.txt
+++ b/lib/sanitizer_common/tests/CMakeLists.txt
@@ -4,12 +4,10 @@
   sanitizer_flags_test.cc
   sanitizer_libc_test.cc
   sanitizer_list_test.cc
+  sanitizer_printf_test.cc
   sanitizer_stackdepot_test.cc
   sanitizer_test_main.cc
   )
-if(CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT LLVM_BUILD_32_BITS)
-  list(APPEND SANITIZER_UNITTESTS sanitizer_allocator64_test.cc)
-endif()
 
 include_directories(..)
 include_directories(../..)
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator64_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator64_test.cc
deleted file mode 100644
index bf76848..0000000
--- a/lib/sanitizer_common/tests/sanitizer_allocator64_test.cc
+++ /dev/null
@@ -1,275 +0,0 @@
-//===-- sanitizer_allocator64_test.cc -------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Tests for sanitizer_allocator64.h.
-//===----------------------------------------------------------------------===//
-#include "sanitizer_common/sanitizer_allocator64.h"
-#include "gtest/gtest.h"
-
-#include <algorithm>
-#include <vector>
-
-static const uptr kAllocatorSpace = 0x600000000000ULL;
-static const uptr kAllocatorSize = 0x10000000000;  // 1T.
-
-typedef DefaultSizeClassMap SCMap;
-typedef
-  SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 16, SCMap> Allocator;
-typedef SizeClassAllocatorLocalCache<Allocator::kNumClasses, Allocator>
-  AllocatorCache;
-
-TEST(SanitizerCommon, DefaultSizeClassMap) {
-#if 0
-  for (uptr i = 0; i < SCMap::kNumClasses; i++) {
-    printf("c%ld => %ld cached=%ld(%ld)\n",
-        i, SCMap::Size(i), SCMap::MaxCached(i) * SCMap::Size(i),
-        SCMap::MaxCached(i));
-  }
-#endif
-
-  for (uptr c = 0; c < SCMap::kNumClasses; c++) {
-    uptr s = SCMap::Size(c);
-    CHECK_EQ(SCMap::ClassID(s), c);
-    if (c != SCMap::kNumClasses - 1)
-      CHECK_EQ(SCMap::ClassID(s + 1), c + 1);
-    CHECK_EQ(SCMap::ClassID(s - 1), c);
-    if (c)
-      CHECK_GT(SCMap::Size(c), SCMap::Size(c-1));
-  }
-  CHECK_EQ(SCMap::ClassID(SCMap::kMaxSize + 1), 0);
-
-  for (uptr s = 1; s <= SCMap::kMaxSize; s++) {
-    uptr c = SCMap::ClassID(s);
-    CHECK_LT(c, SCMap::kNumClasses);
-    CHECK_GE(SCMap::Size(c), s);
-    if (c > 0)
-      CHECK_LT(SCMap::Size(c-1), s);
-  }
-}
-
-TEST(SanitizerCommon, SizeClassAllocator64) {
-  Allocator a;
-  a.Init();
-
-  static const uptr sizes[] = {1, 16, 30, 40, 100, 1000, 10000,
-    50000, 60000, 100000, 300000, 500000, 1000000, 2000000};
-
-  std::vector<void *> allocated;
-
-  uptr last_total_allocated = 0;
-  for (int i = 0; i < 5; i++) {
-    // Allocate a bunch of chunks.
-    for (uptr s = 0; s < sizeof(sizes) /sizeof(sizes[0]); s++) {
-      uptr size = sizes[s];
-      // printf("s = %ld\n", size);
-      uptr n_iter = std::max((uptr)2, 1000000 / size);
-      for (uptr i = 0; i < n_iter; i++) {
-        void *x = a.Allocate(size, 1);
-        allocated.push_back(x);
-        CHECK(a.PointerIsMine(x));
-        CHECK_GE(a.GetActuallyAllocatedSize(x), size);
-        uptr class_id = a.GetSizeClass(x);
-        CHECK_EQ(class_id, SCMap::ClassID(size));
-        uptr *metadata = reinterpret_cast<uptr*>(a.GetMetaData(x));
-        metadata[0] = reinterpret_cast<uptr>(x) + 1;
-        metadata[1] = 0xABCD;
-      }
-    }
-    // Deallocate all.
-    for (uptr i = 0; i < allocated.size(); i++) {
-      void *x = allocated[i];
-      uptr *metadata = reinterpret_cast<uptr*>(a.GetMetaData(x));
-      CHECK_EQ(metadata[0], reinterpret_cast<uptr>(x) + 1);
-      CHECK_EQ(metadata[1], 0xABCD);
-      a.Deallocate(x);
-    }
-    allocated.clear();
-    uptr total_allocated = a.TotalMemoryUsed();
-    if (last_total_allocated == 0)
-      last_total_allocated = total_allocated;
-    CHECK_EQ(last_total_allocated, total_allocated);
-  }
-
-  a.TestOnlyUnmap();
-}
-
-
-TEST(SanitizerCommon, SizeClassAllocator64MetadataStress) {
-  Allocator a;
-  a.Init();
-  static volatile void *sink;
-
-  const uptr kNumAllocs = 10000;
-  void *allocated[kNumAllocs];
-  for (uptr i = 0; i < kNumAllocs; i++) {
-    uptr size = (i % 4096) + 1;
-    void *x = a.Allocate(size, 1);
-    allocated[i] = x;
-  }
-  // Get Metadata kNumAllocs^2 times.
-  for (uptr i = 0; i < kNumAllocs * kNumAllocs; i++) {
-    sink = a.GetMetaData(allocated[i % kNumAllocs]);
-  }
-  for (uptr i = 0; i < kNumAllocs; i++) {
-    a.Deallocate(allocated[i]);
-  }
-
-  a.TestOnlyUnmap();
-  (void)sink;
-}
-
-void FailInAssertionOnOOM() {
-  Allocator a;
-  a.Init();
-  const uptr size = 1 << 20;
-  for (int i = 0; i < 1000000; i++) {
-    a.Allocate(size, 1);
-  }
-
-  a.TestOnlyUnmap();
-}
-
-TEST(SanitizerCommon, SizeClassAllocator64Overflow) {
-  EXPECT_DEATH(FailInAssertionOnOOM(),
-               "allocated_user.*allocated_meta.*kRegionSize");
-}
-
-TEST(SanitizerCommon, LargeMmapAllocator) {
-  LargeMmapAllocator a;
-  a.Init();
-
-  static const int kNumAllocs = 100;
-  void *allocated[kNumAllocs];
-  static const uptr size = 1000;
-  // Allocate some.
-  for (int i = 0; i < kNumAllocs; i++) {
-    allocated[i] = a.Allocate(size, 1);
-  }
-  // Deallocate all.
-  CHECK_GT(a.TotalMemoryUsed(), size * kNumAllocs);
-  for (int i = 0; i < kNumAllocs; i++) {
-    void *p = allocated[i];
-    CHECK(a.PointerIsMine(p));
-    a.Deallocate(p);
-  }
-  // Check that non left.
-  CHECK_EQ(a.TotalMemoryUsed(), 0);
-
-  // Allocate some more, also add metadata.
-  for (int i = 0; i < kNumAllocs; i++) {
-    void *x = a.Allocate(size, 1);
-    CHECK_GE(a.GetActuallyAllocatedSize(x), size);
-    uptr *meta = reinterpret_cast<uptr*>(a.GetMetaData(x));
-    *meta = i;
-    allocated[i] = x;
-  }
-  CHECK_GT(a.TotalMemoryUsed(), size * kNumAllocs);
-  // Deallocate all in reverse order.
-  for (int i = 0; i < kNumAllocs; i++) {
-    int idx = kNumAllocs - i - 1;
-    void *p = allocated[idx];
-    uptr *meta = reinterpret_cast<uptr*>(a.GetMetaData(p));
-    CHECK_EQ(*meta, idx);
-    CHECK(a.PointerIsMine(p));
-    a.Deallocate(p);
-  }
-  CHECK_EQ(a.TotalMemoryUsed(), 0);
-
-  for (uptr alignment = 8; alignment <= (1<<28); alignment *= 2) {
-    for (int i = 0; i < kNumAllocs; i++) {
-      uptr size = ((i % 10) + 1) * kPageSize;
-      allocated[i] = a.Allocate(size, alignment);
-      CHECK_EQ(0, (uptr)allocated[i] % alignment);
-      char *p = (char*)allocated[i];
-      p[0] = p[size - 1] = 0;
-    }
-    for (int i = 0; i < kNumAllocs; i++) {
-      a.Deallocate(allocated[i]);
-    }
-  }
-}
-
-TEST(SanitizerCommon, CombinedAllocator) {
-  typedef Allocator PrimaryAllocator;
-  typedef LargeMmapAllocator SecondaryAllocator;
-  typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
-          SecondaryAllocator> Allocator;
-
-  AllocatorCache cache;
-  Allocator a;
-  a.Init();
-  cache.Init();
-
-  EXPECT_EQ(a.Allocate(&cache, -1, 1), (void*)0);
-  EXPECT_EQ(a.Allocate(&cache, -1, 1024), (void*)0);
-  EXPECT_EQ(a.Allocate(&cache, (uptr)-1 - 1024, 1), (void*)0);
-  EXPECT_EQ(a.Allocate(&cache, (uptr)-1 - 1024, 1024), (void*)0);
-  EXPECT_EQ(a.Allocate(&cache, (uptr)-1 - 1023, 1024), (void*)0);
-
-  const uptr kNumAllocs = 100000;
-  const uptr kNumIter = 10;
-  for (uptr iter = 0; iter < kNumIter; iter++) {
-    std::vector<void*> allocated;
-    for (uptr i = 0; i < kNumAllocs; i++) {
-      uptr size = (i % (1 << 14)) + 1;
-      if ((i % 1024) == 0)
-        size = 1 << (10 + (i % 14));
-      void *x = a.Allocate(&cache, size, 1);
-      uptr *meta = reinterpret_cast<uptr*>(a.GetMetaData(x));
-      CHECK_EQ(*meta, 0);
-      *meta = size;
-      allocated.push_back(x);
-    }
-
-    random_shuffle(allocated.begin(), allocated.end());
-
-    for (uptr i = 0; i < kNumAllocs; i++) {
-      void *x = allocated[i];
-      uptr *meta = reinterpret_cast<uptr*>(a.GetMetaData(x));
-      CHECK_NE(*meta, 0);
-      CHECK(a.PointerIsMine(x));
-      *meta = 0;
-      a.Deallocate(&cache, x);
-    }
-    allocated.clear();
-    a.SwallowCache(&cache);
-  }
-  a.TestOnlyUnmap();
-}
-
-static THREADLOCAL AllocatorCache static_allocator_cache;
-
-TEST(SanitizerCommon, SizeClassAllocatorLocalCache) {
-  static_allocator_cache.Init();
-
-  Allocator a;
-  AllocatorCache cache;
-
-  a.Init();
-  cache.Init();
-
-  const uptr kNumAllocs = 10000;
-  const int kNumIter = 100;
-  uptr saved_total = 0;
-  for (int i = 0; i < kNumIter; i++) {
-    void *allocated[kNumAllocs];
-    for (uptr i = 0; i < kNumAllocs; i++) {
-      allocated[i] = cache.Allocate(&a, 0);
-    }
-    for (uptr i = 0; i < kNumAllocs; i++) {
-      cache.Deallocate(&a, 0, allocated[i]);
-    }
-    cache.Drain(&a);
-    uptr total_allocated = a.TotalMemoryUsed();
-    if (saved_total)
-      CHECK_EQ(saved_total, total_allocated);
-    saved_total = total_allocated;
-  }
-
-  a.TestOnlyUnmap();
-}
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator64_testlib.cc b/lib/sanitizer_common/tests/sanitizer_allocator64_testlib.cc
index 3552f0d..b41f808 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator64_testlib.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator64_testlib.cc
@@ -10,7 +10,7 @@
 // The primary purpose of this file is an end-to-end integration test
 // for CombinedAllocator.
 //===----------------------------------------------------------------------===//
-#include "sanitizer_common/sanitizer_allocator64.h"
+#include "sanitizer_common/sanitizer_allocator.h"
 #include <stddef.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -75,13 +75,13 @@
 
 void *valloc(size_t size) {
   assert(inited);
-  return allocator.Allocate(&cache, size, kPageSize);
+  return allocator.Allocate(&cache, size, GetPageSizeCached());
 }
 
 void *pvalloc(size_t size) {
   assert(inited);
-  if (size == 0) size = kPageSize;
-  return allocator.Allocate(&cache, size, kPageSize);
+  if (size == 0) size = GetPageSizeCached();
+  return allocator.Allocate(&cache, size, GetPageSizeCached());
 }
 }
 #endif
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
index d3445b6..e20b1ca 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
@@ -8,13 +8,368 @@
 //===----------------------------------------------------------------------===//
 //
 // This file is a part of ThreadSanitizer/AddressSanitizer runtime.
+// Tests for sanitizer_allocator.h.
 //
 //===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_common.h"
-#include "gtest/gtest.h"
-#include <stdlib.h>
 
-namespace __sanitizer {
+#include "gtest/gtest.h"
+
+#include <stdlib.h>
+#include <algorithm>
+#include <vector>
+
+#if SANITIZER_WORDSIZE == 64
+static const uptr kAllocatorSpace = 0x700000000000ULL;
+static const uptr kAllocatorSize  = 0x010000000000ULL;  // 1T.
+static const u64 kAddressSpaceSize = 1ULL << 47;
+
+typedef SizeClassAllocator64<
+  kAllocatorSpace, kAllocatorSize, 16, DefaultSizeClassMap> Allocator64;
+
+typedef SizeClassAllocator64<
+  kAllocatorSpace, kAllocatorSize, 16, CompactSizeClassMap> Allocator64Compact;
+#else
+static const u64 kAddressSpaceSize = 1ULL << 32;
+#endif
+
+typedef SizeClassAllocator32<
+  0, kAddressSpaceSize, 16, CompactSizeClassMap> Allocator32Compact;
+
+template <class SizeClassMap>
+void TestSizeClassMap() {
+  typedef SizeClassMap SCMap;
+#if 0
+  for (uptr i = 0; i < SCMap::kNumClasses; i++) {
+    printf("c%ld => %ld (%lx) cached=%ld(%ld)\n",
+        i, SCMap::Size(i), SCMap::Size(i), SCMap::MaxCached(i) * SCMap::Size(i),
+        SCMap::MaxCached(i));
+  }
+#endif
+  for (uptr c = 0; c < SCMap::kNumClasses; c++) {
+    uptr s = SCMap::Size(c);
+    CHECK_EQ(SCMap::ClassID(s), c);
+    if (c != SCMap::kNumClasses - 1)
+      CHECK_EQ(SCMap::ClassID(s + 1), c + 1);
+    CHECK_EQ(SCMap::ClassID(s - 1), c);
+    if (c)
+      CHECK_GT(SCMap::Size(c), SCMap::Size(c-1));
+  }
+  CHECK_EQ(SCMap::ClassID(SCMap::kMaxSize + 1), 0);
+
+  for (uptr s = 1; s <= SCMap::kMaxSize; s++) {
+    uptr c = SCMap::ClassID(s);
+    CHECK_LT(c, SCMap::kNumClasses);
+    CHECK_GE(SCMap::Size(c), s);
+    if (c > 0)
+      CHECK_LT(SCMap::Size(c-1), s);
+  }
+}
+
+TEST(SanitizerCommon, DefaultSizeClassMap) {
+  TestSizeClassMap<DefaultSizeClassMap>();
+}
+
+TEST(SanitizerCommon, CompactSizeClassMap) {
+  TestSizeClassMap<CompactSizeClassMap>();
+}
+
+template <class Allocator>
+void TestSizeClassAllocator() {
+  Allocator *a = new Allocator;
+  a->Init();
+
+  static const uptr sizes[] = {1, 16, 30, 40, 100, 1000, 10000,
+    50000, 60000, 100000, 300000, 500000, 1000000, 2000000};
+
+  std::vector<void *> allocated;
+
+  uptr last_total_allocated = 0;
+  for (int i = 0; i < 5; i++) {
+    // Allocate a bunch of chunks.
+    for (uptr s = 0; s < ARRAY_SIZE(sizes); s++) {
+      uptr size = sizes[s];
+      if (!a->CanAllocate(size, 1)) continue;
+      // printf("s = %ld\n", size);
+      uptr n_iter = std::max((uptr)2, 1000000 / size);
+      for (uptr i = 0; i < n_iter; i++) {
+        void *x = a->Allocate(size, 1);
+        allocated.push_back(x);
+        CHECK_EQ(x, a->GetBlockBegin(x));
+        CHECK_EQ(x, a->GetBlockBegin((char*)x + size - 1));
+        CHECK(a->PointerIsMine(x));
+        CHECK_GE(a->GetActuallyAllocatedSize(x), size);
+        uptr class_id = a->GetSizeClass(x);
+        CHECK_EQ(class_id, Allocator::SizeClassMapT::ClassID(size));
+        uptr *metadata = reinterpret_cast<uptr*>(a->GetMetaData(x));
+        metadata[0] = reinterpret_cast<uptr>(x) + 1;
+        metadata[1] = 0xABCD;
+      }
+    }
+    // Deallocate all.
+    for (uptr i = 0; i < allocated.size(); i++) {
+      void *x = allocated[i];
+      uptr *metadata = reinterpret_cast<uptr*>(a->GetMetaData(x));
+      CHECK_EQ(metadata[0], reinterpret_cast<uptr>(x) + 1);
+      CHECK_EQ(metadata[1], 0xABCD);
+      a->Deallocate(x);
+    }
+    allocated.clear();
+    uptr total_allocated = a->TotalMemoryUsed();
+    if (last_total_allocated == 0)
+      last_total_allocated = total_allocated;
+    CHECK_EQ(last_total_allocated, total_allocated);
+  }
+
+  a->TestOnlyUnmap();
+  delete a;
+}
+
+#if SANITIZER_WORDSIZE == 64
+TEST(SanitizerCommon, SizeClassAllocator64) {
+  TestSizeClassAllocator<Allocator64>();
+}
+
+TEST(SanitizerCommon, SizeClassAllocator64Compact) {
+  TestSizeClassAllocator<Allocator64Compact>();
+}
+#endif
+
+TEST(SanitizerCommon, SizeClassAllocator32Compact) {
+  TestSizeClassAllocator<Allocator32Compact>();
+}
+
+template <class Allocator>
+void SizeClassAllocatorMetadataStress() {
+  Allocator *a = new Allocator;
+  a->Init();
+  static volatile void *sink;
+
+  const uptr kNumAllocs = 10000;
+  void *allocated[kNumAllocs];
+  for (uptr i = 0; i < kNumAllocs; i++) {
+    uptr size = (i % 4096) + 1;
+    void *x = a->Allocate(size, 1);
+    allocated[i] = x;
+  }
+  // Get Metadata kNumAllocs^2 times.
+  for (uptr i = 0; i < kNumAllocs * kNumAllocs; i++) {
+    sink = a->GetMetaData(allocated[i % kNumAllocs]);
+  }
+  for (uptr i = 0; i < kNumAllocs; i++) {
+    a->Deallocate(allocated[i]);
+  }
+
+  a->TestOnlyUnmap();
+  (void)sink;
+  delete a;
+}
+
+#if SANITIZER_WORDSIZE == 64
+TEST(SanitizerCommon, SizeClassAllocator64MetadataStress) {
+  SizeClassAllocatorMetadataStress<Allocator64>();
+}
+
+TEST(SanitizerCommon, SizeClassAllocator64CompactMetadataStress) {
+  SizeClassAllocatorMetadataStress<Allocator64Compact>();
+}
+#endif
+TEST(SanitizerCommon, SizeClassAllocator32CompactMetadataStress) {
+  SizeClassAllocatorMetadataStress<Allocator32Compact>();
+}
+
+template<class Allocator>
+void FailInAssertionOnOOM() {
+  Allocator a;
+  a.Init();
+  const uptr size = 1 << 20;
+  for (int i = 0; i < 1000000; i++) {
+    a.Allocate(size, 1);
+  }
+
+  a.TestOnlyUnmap();
+}
+
+#if SANITIZER_WORDSIZE == 64
+TEST(SanitizerCommon, SizeClassAllocator64Overflow) {
+  EXPECT_DEATH(FailInAssertionOnOOM<Allocator64>(), "Out of memory");
+}
+#endif
+
+TEST(SanitizerCommon, LargeMmapAllocator) {
+  LargeMmapAllocator a;
+  a.Init();
+
+  static const int kNumAllocs = 100;
+  void *allocated[kNumAllocs];
+  static const uptr size = 1000;
+  // Allocate some.
+  for (int i = 0; i < kNumAllocs; i++) {
+    allocated[i] = a.Allocate(size, 1);
+  }
+  // Deallocate all.
+  CHECK_GT(a.TotalMemoryUsed(), size * kNumAllocs);
+  for (int i = 0; i < kNumAllocs; i++) {
+    void *p = allocated[i];
+    CHECK(a.PointerIsMine(p));
+    a.Deallocate(p);
+  }
+  // Check that non left.
+  CHECK_EQ(a.TotalMemoryUsed(), 0);
+
+  // Allocate some more, also add metadata.
+  for (int i = 0; i < kNumAllocs; i++) {
+    void *x = a.Allocate(size, 1);
+    CHECK_GE(a.GetActuallyAllocatedSize(x), size);
+    uptr *meta = reinterpret_cast<uptr*>(a.GetMetaData(x));
+    *meta = i;
+    allocated[i] = x;
+  }
+  CHECK_GT(a.TotalMemoryUsed(), size * kNumAllocs);
+  // Deallocate all in reverse order.
+  for (int i = 0; i < kNumAllocs; i++) {
+    int idx = kNumAllocs - i - 1;
+    void *p = allocated[idx];
+    uptr *meta = reinterpret_cast<uptr*>(a.GetMetaData(p));
+    CHECK_EQ(*meta, idx);
+    CHECK(a.PointerIsMine(p));
+    a.Deallocate(p);
+  }
+  CHECK_EQ(a.TotalMemoryUsed(), 0);
+  uptr max_alignment = SANITIZER_WORDSIZE == 64 ? (1 << 28) : (1 << 24);
+  for (uptr alignment = 8; alignment <= max_alignment; alignment *= 2) {
+    for (int i = 0; i < kNumAllocs; i++) {
+      uptr size = ((i % 10) + 1) * 4096;
+      allocated[i] = a.Allocate(size, alignment);
+      CHECK_EQ(0, (uptr)allocated[i] % alignment);
+      char *p = (char*)allocated[i];
+      p[0] = p[size - 1] = 0;
+    }
+    for (int i = 0; i < kNumAllocs; i++) {
+      a.Deallocate(allocated[i]);
+    }
+  }
+}
+
+template
+<class PrimaryAllocator, class SecondaryAllocator, class AllocatorCache>
+void TestCombinedAllocator() {
+  typedef
+      CombinedAllocator<PrimaryAllocator, AllocatorCache, SecondaryAllocator>
+      Allocator;
+  Allocator *a = new Allocator;
+  a->Init();
+
+  AllocatorCache cache;
+  cache.Init();
+
+  EXPECT_EQ(a->Allocate(&cache, -1, 1), (void*)0);
+  EXPECT_EQ(a->Allocate(&cache, -1, 1024), (void*)0);
+  EXPECT_EQ(a->Allocate(&cache, (uptr)-1 - 1024, 1), (void*)0);
+  EXPECT_EQ(a->Allocate(&cache, (uptr)-1 - 1024, 1024), (void*)0);
+  EXPECT_EQ(a->Allocate(&cache, (uptr)-1 - 1023, 1024), (void*)0);
+
+  const uptr kNumAllocs = 100000;
+  const uptr kNumIter = 10;
+  for (uptr iter = 0; iter < kNumIter; iter++) {
+    std::vector<void*> allocated;
+    for (uptr i = 0; i < kNumAllocs; i++) {
+      uptr size = (i % (1 << 14)) + 1;
+      if ((i % 1024) == 0)
+        size = 1 << (10 + (i % 14));
+      void *x = a->Allocate(&cache, size, 1);
+      uptr *meta = reinterpret_cast<uptr*>(a->GetMetaData(x));
+      CHECK_EQ(*meta, 0);
+      *meta = size;
+      allocated.push_back(x);
+    }
+
+    random_shuffle(allocated.begin(), allocated.end());
+
+    for (uptr i = 0; i < kNumAllocs; i++) {
+      void *x = allocated[i];
+      uptr *meta = reinterpret_cast<uptr*>(a->GetMetaData(x));
+      CHECK_NE(*meta, 0);
+      CHECK(a->PointerIsMine(x));
+      *meta = 0;
+      a->Deallocate(&cache, x);
+    }
+    allocated.clear();
+    a->SwallowCache(&cache);
+  }
+  a->TestOnlyUnmap();
+}
+
+#if SANITIZER_WORDSIZE == 64
+TEST(SanitizerCommon, CombinedAllocator64) {
+  TestCombinedAllocator<Allocator64,
+      LargeMmapAllocator,
+      SizeClassAllocatorLocalCache<Allocator64> > ();
+}
+
+TEST(SanitizerCommon, CombinedAllocator64Compact) {
+  TestCombinedAllocator<Allocator64Compact,
+      LargeMmapAllocator,
+      SizeClassAllocatorLocalCache<Allocator64Compact> > ();
+}
+#endif
+
+TEST(SanitizerCommon, CombinedAllocator32Compact) {
+  TestCombinedAllocator<Allocator32Compact,
+      LargeMmapAllocator,
+      SizeClassAllocatorLocalCache<Allocator32Compact> > ();
+}
+
+template <class AllocatorCache>
+void TestSizeClassAllocatorLocalCache() {
+  static THREADLOCAL AllocatorCache static_allocator_cache;
+  static_allocator_cache.Init();
+  AllocatorCache cache;
+  typedef typename AllocatorCache::Allocator Allocator;
+  Allocator *a = new Allocator();
+
+  a->Init();
+  cache.Init();
+
+  const uptr kNumAllocs = 10000;
+  const int kNumIter = 100;
+  uptr saved_total = 0;
+  for (int i = 0; i < kNumIter; i++) {
+    void *allocated[kNumAllocs];
+    for (uptr i = 0; i < kNumAllocs; i++) {
+      allocated[i] = cache.Allocate(a, 0);
+    }
+    for (uptr i = 0; i < kNumAllocs; i++) {
+      cache.Deallocate(a, 0, allocated[i]);
+    }
+    cache.Drain(a);
+    uptr total_allocated = a->TotalMemoryUsed();
+    if (saved_total)
+      CHECK_EQ(saved_total, total_allocated);
+    saved_total = total_allocated;
+  }
+
+  a->TestOnlyUnmap();
+  delete a;
+}
+
+#if SANITIZER_WORDSIZE == 64
+TEST(SanitizerCommon, SizeClassAllocator64LocalCache) {
+  TestSizeClassAllocatorLocalCache<
+      SizeClassAllocatorLocalCache<Allocator64> >();
+}
+
+TEST(SanitizerCommon, SizeClassAllocator64CompactLocalCache) {
+  TestSizeClassAllocatorLocalCache<
+      SizeClassAllocatorLocalCache<Allocator64Compact> >();
+}
+#endif
+
+TEST(SanitizerCommon, SizeClassAllocator32CompactLocalCache) {
+  TestSizeClassAllocatorLocalCache<
+      SizeClassAllocatorLocalCache<Allocator32Compact> >();
+}
 
 TEST(Allocator, Basic) {
   char *p = (char*)InternalAlloc(10);
@@ -22,14 +377,6 @@
   char *p2 = (char*)InternalAlloc(20);
   EXPECT_NE(p2, (char*)0);
   EXPECT_NE(p2, p);
-  for (int i = 0; i < 10; i++) {
-    p[i] = 42;
-    EXPECT_EQ(p, InternalAllocBlock(p + i));
-  }
-  for (int i = 0; i < 20; i++) {
-    ((char*)p2)[i] = 42;
-    EXPECT_EQ(p2, InternalAllocBlock(p2 + i));
-  }
   InternalFree(p);
   InternalFree(p2);
 }
@@ -42,10 +389,6 @@
     uptr sz = rand_r(&rnd) % 1000;
     char *p = (char*)InternalAlloc(sz);
     EXPECT_NE(p, (char*)0);
-    for (uptr j = 0; j < sz; j++) {
-      p[j] = 42;
-      EXPECT_EQ(p, InternalAllocBlock(p + j));
-    }
     ptrs[i] = p;
   }
   for (int i = 0; i < kCount; i++) {
@@ -61,10 +404,8 @@
   }
   InternalScopedBuffer<char> char_buf(kSize);
   EXPECT_EQ(sizeof(char) * kSize, char_buf.size());  // NOLINT
-  memset(char_buf.data(), 'c', kSize);
+  internal_memset(char_buf.data(), 'c', kSize);
   for (int i = 0; i < kSize; i++) {
     EXPECT_EQ('c', char_buf[i]);
   }
 }
-
-}  // namespace __sanitizer
diff --git a/lib/sanitizer_common/tests/sanitizer_common_test.cc b/lib/sanitizer_common/tests/sanitizer_common_test.cc
index 91570dc..dfb8884 100644
--- a/lib/sanitizer_common/tests/sanitizer_common_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_common_test.cc
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
 #include "gtest/gtest.h"
 
 namespace __sanitizer {
@@ -63,4 +64,36 @@
   EXPECT_TRUE(IsSorted(array, 2));
 }
 
+TEST(SanitizerCommon, MmapAlignedOrDie) {
+  uptr PageSize = GetPageSizeCached();
+  for (uptr size = 1; size <= 32; size *= 2) {
+    for (uptr alignment = 1; alignment <= 32; alignment *= 2) {
+      for (int iter = 0; iter < 100; iter++) {
+        uptr res = (uptr)MmapAlignedOrDie(
+            size * PageSize, alignment * PageSize, "MmapAlignedOrDieTest");
+        EXPECT_EQ(0U, res % (alignment * PageSize));
+        memset((void*)res, 1, size * PageSize);
+        UnmapOrDie((void*)res, size * PageSize);
+      }
+    }
+  }
+}
+
+#ifdef __linux__
+TEST(SanitizerCommon, SanitizerSetThreadName) {
+  const char *names[] = {
+    "0123456789012",
+    "01234567890123",
+    "012345678901234",  // Larger names will be truncated on linux.
+  };
+
+  for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
+    EXPECT_TRUE(SanitizerSetThreadName(names[i]));
+    char buff[100];
+    EXPECT_TRUE(SanitizerGetThreadName(buff, sizeof(buff) - 1));
+    EXPECT_EQ(0, internal_strcmp(buff, names[i]));
+  }
+}
+#endif
+
 }  // namespace sanitizer
diff --git a/lib/tsan/tests/unit/tsan_printf_test.cc b/lib/sanitizer_common/tests/sanitizer_printf_test.cc
similarity index 72%
rename from lib/tsan/tests/unit/tsan_printf_test.cc
rename to lib/sanitizer_common/tests/sanitizer_printf_test.cc
index 0dfd1d2..b1889cd 100644
--- a/lib/tsan/tests/unit/tsan_printf_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_printf_test.cc
@@ -1,4 +1,4 @@
-//===-- tsan_printf_test.cc -----------------------------------------------===//
+//===-- sanitizer_printf_test.cc ------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,16 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file is a part of ThreadSanitizer (TSan), a race detector.
+// Tests for sanitizer_printf.cc
 //
 //===----------------------------------------------------------------------===//
-#include "tsan_rtl.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
 #include "gtest/gtest.h"
 
 #include <string.h>
 #include <limits.h>
 
-namespace __tsan {
+namespace __sanitizer {
 
 TEST(Printf, Basic) {
   char buf[1024];
@@ -27,15 +28,21 @@
       (unsigned)10, (unsigned long)11, // NOLINT
       (void*)0x123, "_string_");
   EXPECT_EQ(len, strlen(buf));
-  EXPECT_EQ(0, strcmp(buf, "a-1b-2c4294967292e5fahbq"
-                           "0x000000000123e_string_r"));
+  void *ptr;
+  if (sizeof(ptr) == 4) {
+    EXPECT_STREQ("a-1b-2c4294967292e5fahbq"
+                 "0x00000123e_string_r", buf);
+  } else {
+    EXPECT_STREQ("a-1b-2c4294967292e5fahbq"
+                 "0x000000000123e_string_r", buf);
+  }
 }
 
 TEST(Printf, OverflowStr) {
   char buf[] = "123456789";
   uptr len = internal_snprintf(buf, 4, "%s", "abcdef");  // NOLINT
   EXPECT_EQ(len, (uptr)6);
-  EXPECT_EQ(0, strcmp(buf, "abc"));
+  EXPECT_STREQ("abc", buf);
   EXPECT_EQ(buf[3], 0);
   EXPECT_EQ(buf[4], '5');
   EXPECT_EQ(buf[5], '6');
@@ -48,7 +55,7 @@
 TEST(Printf, OverflowInt) {
   char buf[] = "123456789";
   internal_snprintf(buf, 4, "%d", -123456789);  // NOLINT
-  EXPECT_EQ(0, strcmp(buf, "-12"));
+  EXPECT_STREQ("-12", buf);
   EXPECT_EQ(buf[3], 0);
   EXPECT_EQ(buf[4], '5');
   EXPECT_EQ(buf[5], '6');
@@ -60,8 +67,14 @@
 
 TEST(Printf, OverflowUint) {
   char buf[] = "123456789";
-  internal_snprintf(buf, 4, "a%zx", (unsigned long)0x123456789);  // NOLINT
-  EXPECT_EQ(0, strcmp(buf, "a12"));
+  uptr val;
+  if (sizeof(val) == 4) {
+    val = (uptr)0x12345678;
+  } else {
+    val = (uptr)0x123456789ULL;
+  }
+  internal_snprintf(buf, 4, "a%zx", val);  // NOLINT
+  EXPECT_STREQ("a12", buf);
   EXPECT_EQ(buf[3], 0);
   EXPECT_EQ(buf[4], '5');
   EXPECT_EQ(buf[5], '6');
@@ -73,8 +86,14 @@
 
 TEST(Printf, OverflowPtr) {
   char buf[] = "123456789";
-  internal_snprintf(buf, 4, "%p", (void*)0x123456789);  // NOLINT
-  EXPECT_EQ(0, strcmp(buf, "0x0"));
+  void *p;
+  if (sizeof(p) == 4) {
+    p = (void*)0x1234567;
+  } else {
+    p = (void*)0x123456789ULL;
+  }
+  internal_snprintf(buf, 4, "%p", p);  // NOLINT
+  EXPECT_STREQ("0x0", buf);
   EXPECT_EQ(buf[3], 0);
   EXPECT_EQ(buf[4], '5');
   EXPECT_EQ(buf[5], '6');
@@ -91,7 +110,7 @@
   char buf2[1024];
   snprintf(buf2, sizeof(buf2), fmt, min, max);
   EXPECT_EQ(len, strlen(buf));
-  EXPECT_EQ(0, strcmp(buf, buf2));
+  EXPECT_STREQ(buf2, buf);
 }
 
 TEST(Printf, MinMax) {
@@ -103,4 +122,4 @@
   TestMinMax<unsigned long>("%zx-%zx", 0, ULONG_MAX);  // NOLINT
 }
 
-}  // namespace __tsan
+}  // namespace __sanitizer
diff --git a/lib/tsan/go/buildgo.sh b/lib/tsan/go/buildgo.sh
index 2351de7..dc4a238 100755
--- a/lib/tsan/go/buildgo.sh
+++ b/lib/tsan/go/buildgo.sh
@@ -1,16 +1,6 @@
 #!/bin/bash
 set -e
-
-if [ "`uname -a | grep Linux`" != "" ]; then
-	LINUX=1
-	SUFFIX="linux_amd64"
-elif [ "`uname -a | grep Darwin`" != "" ]; then
-	MAC=1
-	SUFFIX="darwin_amd64"
-else
-	echo Unknown platform
-	exit 1
-fi
+set -x
 
 SRCS="
 	tsan_go.cc
@@ -18,7 +8,6 @@
 	../rtl/tsan_flags.cc
 	../rtl/tsan_md5.cc
 	../rtl/tsan_mutex.cc
-	../rtl/tsan_printf.cc
 	../rtl/tsan_report.cc
 	../rtl/tsan_rtl.cc
 	../rtl/tsan_rtl_mutex.cc
@@ -31,47 +20,59 @@
 	../../sanitizer_common/sanitizer_common.cc
 	../../sanitizer_common/sanitizer_flags.cc
 	../../sanitizer_common/sanitizer_libc.cc
-	../../sanitizer_common/sanitizer_posix.cc
 	../../sanitizer_common/sanitizer_printf.cc
 "
 
-if [ "$LINUX" != "" ]; then
+if [ "`uname -a | grep Linux`" != "" ]; then
+	SUFFIX="linux_amd64"
+	OSCFLAGS="-fPIC -ffreestanding"
+	OSLDFLAGS="-lpthread -fPIC -fpie"
 	SRCS+="
 		../rtl/tsan_platform_linux.cc
+		../../sanitizer_common/sanitizer_posix.cc
 		../../sanitizer_common/sanitizer_linux.cc
 	"
-elif [ "$MAC" != "" ]; then
+elif [ "`uname -a | grep Darwin`" != "" ]; then
+	SUFFIX="darwin_amd64"
+	OSCFLAGS="-fPIC"
+	OSLDFLAGS="-lpthread -fPIC -fpie"
 	SRCS+="
 		../rtl/tsan_platform_mac.cc
+		../../sanitizer_common/sanitizer_posix.cc
 		../../sanitizer_common/sanitizer_mac.cc
 	"
+elif [ "`uname -a | grep MINGW`" != "" ]; then
+	SUFFIX="windows_amd64"
+	OSCFLAGS="-Wno-error=attributes -Wno-attributes"
+	OSLDFLAGS=""
+	SRCS+="
+		../rtl/tsan_platform_windows.cc
+		../../sanitizer_common/sanitizer_win.cc
+	"
+else
+	echo Unknown platform
+	exit 1
 fi
 
 SRCS+=$ADD_SRCS
-#ASMS="../rtl/tsan_rtl_amd64.S"
 
 rm -f gotsan.cc
 for F in $SRCS; do
 	cat $F >> gotsan.cc
 done
 
-FLAGS=" -I../rtl -I../.. -I../../sanitizer_common -I../../../include -fPIC -Wall -Werror -fno-exceptions -DTSAN_GO -DSANITIZER_GO -DTSAN_SHADOW_COUNT=4"
+FLAGS=" -I../rtl -I../.. -I../../sanitizer_common -I../../../include -m64 -Wall -Werror -fno-exceptions -DTSAN_GO -DSANITIZER_GO -DTSAN_SHADOW_COUNT=4 $OSCFLAGS"
 if [ "$DEBUG" == "" ]; then
 	FLAGS+=" -DTSAN_DEBUG=0 -O3 -fomit-frame-pointer"
 else
 	FLAGS+=" -DTSAN_DEBUG=1 -g"
 fi
 
-if [ "$LINUX" != "" ]; then
-	FLAGS+=" -ffreestanding"
-fi
-
 echo gcc gotsan.cc -S -o tmp.s $FLAGS $CFLAGS
 gcc gotsan.cc -S -o tmp.s $FLAGS $CFLAGS
 cat tmp.s $ASMS > gotsan.s
 echo as gotsan.s -o race_$SUFFIX.syso
 as gotsan.s -o race_$SUFFIX.syso
 
-gcc test.c race_$SUFFIX.syso -lpthread -o test
-TSAN_OPTIONS="exitcode=0" ./test
-
+gcc test.c race_$SUFFIX.syso -m64 -o test $OSLDFLAGS
+GORACE="exitcode=0 atexit_sleep_ms=0" ./test
diff --git a/lib/tsan/go/test.c b/lib/tsan/go/test.c
index a9a5b3d..2414a1e 100644
--- a/lib/tsan/go/test.c
+++ b/lib/tsan/go/test.c
@@ -15,6 +15,7 @@
 
 void __tsan_init();
 void __tsan_fini();
+void __tsan_map_shadow(void *addr, unsigned long size);
 void __tsan_go_start(int pgoid, int chgoid, void *pc);
 void __tsan_go_end(int goid);
 void __tsan_read(int goid, void *addr, void *pc);
@@ -35,6 +36,7 @@
 
 int main(void) {
   __tsan_init();
+  __tsan_map_shadow(buf, sizeof(buf) + 4096);
   __tsan_func_enter(0, &main);
   __tsan_malloc(0, buf, 10, 0);
   __tsan_release(0, buf);
diff --git a/lib/tsan/go/tsan_go.cc b/lib/tsan/go/tsan_go.cc
index eca6cae..cfbe257 100644
--- a/lib/tsan/go/tsan_go.cc
+++ b/lib/tsan/go/tsan_go.cc
@@ -108,6 +108,10 @@
   exit(res);
 }
 
+void __tsan_map_shadow(uptr addr, uptr size) {
+  MapShadow(addr, size);
+}
+
 void __tsan_read(int goid, void *addr, void *pc) {
   ThreadState *thr = goroutines[goid];
   MemoryAccess(thr, (uptr)pc, (uptr)addr, 0, false);
@@ -118,6 +122,18 @@
   MemoryAccess(thr, (uptr)pc, (uptr)addr, 0, true);
 }
 
+void __tsan_read_range(int goid, void *addr, uptr size, uptr step, void *pc) {
+  ThreadState *thr = goroutines[goid];
+  for (uptr i = 0; i < size; i += step)
+	  MemoryAccess(thr, (uptr)pc, (uptr)addr + i, 0, false);
+}
+
+void __tsan_write_range(int goid, void *addr, uptr size, uptr step, void *pc) {
+  ThreadState *thr = goroutines[goid];
+  for (uptr i = 0; i < size; i += step)
+	  MemoryAccess(thr, (uptr)pc, (uptr)addr + i, 0, true);
+}
+
 void __tsan_func_enter(int goid, void *pc) {
   ThreadState *thr = goroutines[goid];
   FuncEntry(thr, (uptr)pc);
@@ -133,8 +149,7 @@
   if (thr == 0)  // probably before __tsan_init()
     return;
   thr->in_rtl++;
-  MemoryResetRange(thr, (uptr)pc, (uptr)p, sz);
-  MemoryAccessRange(thr, (uptr)pc, (uptr)p, sz, true);
+  MemoryRangeImitateWrite(thr, (uptr)pc, (uptr)p, sz);
   thr->in_rtl--;
 }
 
@@ -188,8 +203,42 @@
 
 void __tsan_finalizer_goroutine(int goid) {
   ThreadState *thr = goroutines[goid];
-  ThreadFinalizerGoroutine(thr);
+  AcquireGlobal(thr, 0);
 }
 
+#ifdef _WIN32
+// MinGW gcc emits calls to the function.
+void ___chkstk_ms(void) {
+// The implementation must be along the lines of:
+// .code64
+// PUBLIC ___chkstk_ms
+//     //cfi_startproc()
+// ___chkstk_ms:
+//     push rcx
+//     //cfi_push(%rcx)
+//     push rax
+//     //cfi_push(%rax)
+//     cmp rax, PAGE_SIZE
+//     lea rcx, [rsp + 24]
+//     jb l_LessThanAPage
+// .l_MoreThanAPage:
+//     sub rcx, PAGE_SIZE
+//     or rcx, 0
+//     sub rax, PAGE_SIZE
+//     cmp rax, PAGE_SIZE
+//     ja l_MoreThanAPage
+// .l_LessThanAPage:
+//     sub rcx, rax
+//     or [rcx], 0
+//     pop rax
+//     //cfi_pop(%rax)
+//     pop rcx
+//     //cfi_pop(%rcx)
+//     ret
+//     //cfi_endproc()
+// END
+}
+#endif
+
 }  // extern "C"
 }  // namespace __tsan
diff --git a/lib/tsan/lit_tests/CMakeLists.txt b/lib/tsan/lit_tests/CMakeLists.txt
index 1958ebf..6dc90e2 100644
--- a/lib/tsan/lit_tests/CMakeLists.txt
+++ b/lib/tsan/lit_tests/CMakeLists.txt
@@ -12,7 +12,7 @@
   # Run TSan output tests only if we're not cross-compiling,
   # and can be sure that clang would produce working binaries.
   set(TSAN_TEST_DEPS
-    clang clang-headers FileCheck count not
+    clang clang-headers FileCheck count not llvm-symbolizer
     ${TSAN_RUNTIME_LIBRARIES}
     )
   set(TSAN_TEST_PARAMS
@@ -31,6 +31,6 @@
   # Otherwise run only TSan unit tests.
   add_lit_testsuite(check-tsan "Running ThreadSanitizer tests"
     ${CMAKE_CURRENT_BINARY_DIR}/Unit
-    DEPENDS TsanUnitTests)
+    DEPENDS TsanUnitTests llvm-symbolizer)
   set_target_properties(check-tsan PROPERTIES FOLDER "TSan unittests")
 endif()
diff --git a/lib/tsan/lit_tests/Unit/lit.cfg b/lib/tsan/lit_tests/Unit/lit.cfg
index 2fba576..6688697 100644
--- a/lib/tsan/lit_tests/Unit/lit.cfg
+++ b/lib/tsan/lit_tests/Unit/lit.cfg
@@ -27,3 +27,11 @@
                                      "compiler-rt", "lib",
                                      "tsan", "tests")
 config.test_source_root = config.test_exec_root
+
+# Get path to external LLVM symbolizer to run ThreadSanitizer unit tests.
+llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
+if llvm_tools_dir:
+  llvm_symbolizer_path = os.path.join(llvm_tools_dir, "llvm-symbolizer")
+  config.environment['TSAN_OPTIONS'] = ("external_symbolizer_path=" +
+                                        llvm_symbolizer_path)
+
diff --git a/lib/tsan/lit_tests/Unit/lit.site.cfg.in b/lib/tsan/lit_tests/Unit/lit.site.cfg.in
index 38c24bc..23654b9 100644
--- a/lib/tsan/lit_tests/Unit/lit.site.cfg.in
+++ b/lib/tsan/lit_tests/Unit/lit.site.cfg.in
@@ -4,6 +4,15 @@
 config.build_type = "@CMAKE_BUILD_TYPE@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+
+# LLVM tools dir can be passed in lit parameters, so try to
+# apply substitution.
+try:
+  config.llvm_tools_dir = config.llvm_tools_dir % lit.params
+except KeyError,e:
+  key, = e.args
+  lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key, key))
 
 # Let the main config do the real work.
 lit.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/Unit/lit.cfg")
diff --git a/lib/tsan/lit_tests/fd_close_norace.cc b/lib/tsan/lit_tests/fd_close_norace.cc
new file mode 100644
index 0000000..c000de4
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_close_norace.cc
@@ -0,0 +1,32 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+void *Thread1(void *x) {
+  int f = open("/dev/random", O_RDONLY);
+  close(f);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  sleep(1);
+  int f = open("/dev/random", O_RDONLY);
+  close(f);
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+
+
diff --git a/lib/tsan/lit_tests/fd_pipe_norace.cc b/lib/tsan/lit_tests/fd_pipe_norace.cc
new file mode 100644
index 0000000..1be1a34
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_pipe_norace.cc
@@ -0,0 +1,32 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int fds[2];
+int X;
+
+void *Thread1(void *x) {
+  X = 42;
+  write(fds[1], "a", 1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  char buf;
+  while (read(fds[0], &buf, 1) != 1) {
+  }
+  X = 43;
+  return NULL;
+}
+
+int main() {
+  pipe(fds);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/fd_pipe_race.cc b/lib/tsan/lit_tests/fd_pipe_race.cc
new file mode 100644
index 0000000..dfdb779
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_pipe_race.cc
@@ -0,0 +1,37 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int fds[2];
+
+void *Thread1(void *x) {
+  write(fds[1], "a", 1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  sleep(1);
+  close(fds[0]);
+  close(fds[1]);
+  return NULL;
+}
+
+int main() {
+  pipe(fds);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 8
+// CHECK:     #0 close
+// CHECK:     #1 Thread2
+// CHECK:   Previous read of size 8
+// CHECK:     #0 write
+// CHECK:     #1 Thread1
+
+
diff --git a/lib/tsan/lit_tests/fd_socket_norace.cc b/lib/tsan/lit_tests/fd_socket_norace.cc
new file mode 100644
index 0000000..3a128f8
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_socket_norace.cc
@@ -0,0 +1,51 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+struct sockaddr_in addr;
+int X;
+
+void *ClientThread(void *x) {
+  X = 42;
+  int c = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+  if (connect(c, (struct sockaddr*)&addr, sizeof(addr))) {
+    perror("connect");
+    exit(1);
+  }
+  if (send(c, "a", 1, 0) != 1) {
+    perror("send");
+    exit(1);
+  }
+  close(c);
+  return NULL;
+}
+
+int main() {
+  int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+  addr.sin_family = AF_INET;
+  inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr);
+  addr.sin_port = INADDR_ANY;
+  socklen_t len = sizeof(addr);
+  bind(s, (sockaddr*)&addr, len);
+  getsockname(s, (sockaddr*)&addr, &len);
+  listen(s, 10);
+  pthread_t t;
+  pthread_create(&t, 0, ClientThread, 0);
+  int c = accept(s, 0, 0);
+  char buf;
+  while (read(c, &buf, 1) != 1) {
+  }
+  X = 43;
+  close(c);
+  close(s);
+  pthread_join(t, 0);
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+
diff --git a/lib/tsan/lit_tests/fd_stdout_race.cc b/lib/tsan/lit_tests/fd_stdout_race.cc
new file mode 100644
index 0000000..6581fc5
--- /dev/null
+++ b/lib/tsan/lit_tests/fd_stdout_race.cc
@@ -0,0 +1,41 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int X;
+
+void *Thread1(void *x) {
+  sleep(1);
+  int f = open("/dev/random", O_RDONLY);
+  char buf;
+  read(f, &buf, 1);
+  close(f);
+  X = 42;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  X = 43;
+  write(STDOUT_FILENO, "a", 1);
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4
+// CHECK:     #0 Thread1
+// CHECK:   Previous write of size 4
+// CHECK:     #0 Thread2
+
+
diff --git a/lib/tsan/lit_tests/free_race.c b/lib/tsan/lit_tests/free_race.c
index 9200c3b..7a2ec0c 100644
--- a/lib/tsan/lit_tests/free_race.c
+++ b/lib/tsan/lit_tests/free_race.c
@@ -16,7 +16,7 @@
 }
 
 void *Thread2(void *x) {
-  usleep(1000000);
+  sleep(1);
   pthread_mutex_lock(&mtx);
   mem[0] = 42;
   pthread_mutex_unlock(&mtx);
@@ -35,9 +35,9 @@
 }
 
 // CHECK: WARNING: ThreadSanitizer: heap-use-after-free
-// CHECK:   Write of size 4 at {{.*}} by main thread:
+// CHECK:   Write of size 4 at {{.*}} by main thread{{.*}}:
 // CHECK:     #0 Thread2
 // CHECK:     #1 main
-// CHECK:   Previous write of size 8 at {{.*}} by thread 1:
+// CHECK:   Previous write of size 8 at {{.*}} by thread T1{{.*}}:
 // CHECK:     #0 free
 // CHECK:     #1 Thread1
diff --git a/lib/tsan/lit_tests/ignore_race.cc b/lib/tsan/lit_tests/ignore_race.cc
new file mode 100644
index 0000000..7a60ca1
--- /dev/null
+++ b/lib/tsan/lit_tests/ignore_race.cc
@@ -0,0 +1,31 @@
+// RUN: %clang_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+
+extern "C" void AnnotateIgnoreWritesBegin(const char *f, int l);
+extern "C" void AnnotateIgnoreWritesEnd(const char *f, int l);
+extern "C" void AnnotateIgnoreReadsBegin(const char *f, int l);
+extern "C" void AnnotateIgnoreReadsEnd(const char *f, int l);
+
+void *Thread(void *x) {
+  AnnotateIgnoreWritesBegin(__FILE__, __LINE__);
+  AnnotateIgnoreReadsBegin(__FILE__, __LINE__);
+  Global = 42;
+  AnnotateIgnoreReadsEnd(__FILE__, __LINE__);
+  AnnotateIgnoreWritesEnd(__FILE__, __LINE__);
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  sleep(1);
+  Global = 43;
+  pthread_join(t, 0);
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/lit_tests/lit.cfg b/lib/tsan/lit_tests/lit.cfg
index acfc744..7e2db7b 100644
--- a/lib/tsan/lit_tests/lit.cfg
+++ b/lib/tsan/lit_tests/lit.cfg
@@ -57,11 +57,18 @@
 lit.load_config(config, compiler_rt_lit_cfg)
 
 # Setup environment variables for running ThreadSanitizer.
-config.environment['TSAN_OPTIONS'] = "atexit_sleep_ms=0"
+tsan_options = "atexit_sleep_ms=0"
+# Get path to external LLVM symbolizer to run ThreadSanitizer output tests.
+llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
+if llvm_tools_dir:
+  llvm_symbolizer_path = os.path.join(llvm_tools_dir, "llvm-symbolizer")
+  tsan_options += " " + "external_symbolizer_path=" + llvm_symbolizer_path
 
-# Setup default compiler flags used with -faddress-sanitizer option.
+config.environment['TSAN_OPTIONS'] = tsan_options
+
+# Setup default compiler flags used with -fsanitize=thread option.
 # FIXME: Review the set of required flags and check if it can be reduced.
-clang_tsan_cflags = ("-fthread-sanitizer "
+clang_tsan_cflags = ("-fsanitize=thread "
                       + "-fPIE "
                       + "-fno-builtin "
                       + "-g "
diff --git a/lib/tsan/lit_tests/memcpy_race.cc b/lib/tsan/lit_tests/memcpy_race.cc
index c87bc9c..806740d 100644
--- a/lib/tsan/lit_tests/memcpy_race.cc
+++ b/lib/tsan/lit_tests/memcpy_race.cc
@@ -15,7 +15,7 @@
 }
 
 void *Thread2(void *x) {
-  usleep(500*1000);
+  sleep(1);
   memcpy(data+3, data2, 4);
   return NULL;
 }
@@ -32,9 +32,9 @@
 
 // CHECK: addr=[[ADDR:0x[0-9,a-f]+]]
 // CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 1 at [[ADDR]] by thread 2:
+// CHECK:   Write of size 1 at [[ADDR]] by thread T2:
 // CHECK:     #0 memcpy
 // CHECK:     #1 Thread2
-// CHECK:   Previous write of size 1 at [[ADDR]] by thread 1:
+// CHECK:   Previous write of size 1 at [[ADDR]] by thread T1:
 // CHECK:     #0 memcpy
 // CHECK:     #1 Thread1
diff --git a/lib/tsan/lit_tests/mop_with_offset.cc b/lib/tsan/lit_tests/mop_with_offset.cc
index 14ece1a..0c11ef6 100644
--- a/lib/tsan/lit_tests/mop_with_offset.cc
+++ b/lib/tsan/lit_tests/mop_with_offset.cc
@@ -11,7 +11,7 @@
 }
 
 void *Thread2(void *x) {
-  usleep(500*1000);
+  sleep(1);
   char *p = (char*)x;
   p[2] = 1;
   return NULL;
@@ -32,5 +32,5 @@
 // CHECK: ptr1=[[PTR1:0x[0-9,a-f]+]]
 // CHECK: ptr2=[[PTR2:0x[0-9,a-f]+]]
 // CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 1 at [[PTR2]] by thread 2:
-// CHECK:   Previous write of size 4 at [[PTR1]] by thread 1:
+// CHECK:   Write of size 1 at [[PTR2]] by thread T2:
+// CHECK:   Previous write of size 4 at [[PTR1]] by thread T1:
diff --git a/lib/tsan/lit_tests/mop_with_offset2.cc b/lib/tsan/lit_tests/mop_with_offset2.cc
index 2a6fde7..ee0d64a 100644
--- a/lib/tsan/lit_tests/mop_with_offset2.cc
+++ b/lib/tsan/lit_tests/mop_with_offset2.cc
@@ -5,7 +5,7 @@
 #include <unistd.h>
 
 void *Thread1(void *x) {
-  usleep(500*1000);
+  sleep(1);
   int *p = (int*)x;
   p[0] = 1;
   return NULL;
@@ -32,5 +32,5 @@
 // CHECK: ptr1=[[PTR1:0x[0-9,a-f]+]]
 // CHECK: ptr2=[[PTR2:0x[0-9,a-f]+]]
 // CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 4 at [[PTR1]] by thread 1:
-// CHECK:   Previous write of size 1 at [[PTR2]] by thread 2:
+// CHECK:   Write of size 4 at [[PTR1]] by thread T1:
+// CHECK:   Previous write of size 1 at [[PTR2]] by thread T2:
diff --git a/lib/tsan/lit_tests/mutex_destroy_locked.cc b/lib/tsan/lit_tests/mutex_destroy_locked.cc
index 427c643..8523f55 100644
--- a/lib/tsan/lit_tests/mutex_destroy_locked.cc
+++ b/lib/tsan/lit_tests/mutex_destroy_locked.cc
@@ -12,7 +12,7 @@
   pthread_mutex_init(&m, 0);
   pthread_t t;
   pthread_create(&t, 0, Thread, &m);
-  usleep(1000*1000);
+  sleep(1);
   pthread_mutex_destroy(&m);
   pthread_join(t, 0);
   return 0;
diff --git a/lib/tsan/lit_tests/mutexset1.cc b/lib/tsan/lit_tests/mutexset1.cc
new file mode 100644
index 0000000..e27e056
--- /dev/null
+++ b/lib/tsan/lit_tests/mutexset1.cc
@@ -0,0 +1,39 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+pthread_mutex_t mtx;
+
+void *Thread1(void *x) {
+  sleep(1);
+  pthread_mutex_lock(&mtx);
+  Global++;
+  pthread_mutex_unlock(&mtx);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  Global--;
+  return NULL;
+}
+
+int main() {
+  pthread_mutex_init(&mtx, 0);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  pthread_mutex_destroy(&mtx);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4 at {{.*}} by thread T1
+// CHECK:                         (mutexes: write [[M1:M[0-9]+]]):
+// CHECK:   Previous write of size 4 at {{.*}} by thread T2:
+// CHECK:   Mutex [[M1]] created at:
+// CHECK:     #0 pthread_mutex_init
+// CHECK:     #1 main {{.*}}/mutexset1.cc:23
+
diff --git a/lib/tsan/lit_tests/mutexset2.cc b/lib/tsan/lit_tests/mutexset2.cc
new file mode 100644
index 0000000..8c85043
--- /dev/null
+++ b/lib/tsan/lit_tests/mutexset2.cc
@@ -0,0 +1,39 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+pthread_mutex_t mtx;
+
+void *Thread1(void *x) {
+  pthread_mutex_lock(&mtx);
+  Global++;
+  pthread_mutex_unlock(&mtx);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  sleep(1);
+  Global--;
+  return NULL;
+}
+
+int main() {
+  pthread_mutex_init(&mtx, 0);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  pthread_mutex_destroy(&mtx);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4 at {{.*}} by thread T2:
+// CHECK:   Previous write of size 4 at {{.*}} by thread T1
+// CHECK:                     (mutexes: write [[M1:M[0-9]+]]):
+// CHECK:   Mutex [[M1]] created at:
+// CHECK:     #0 pthread_mutex_init
+// CHECK:     #1 main {{.*}}/mutexset2.cc:23
+
diff --git a/lib/tsan/lit_tests/mutexset3.cc b/lib/tsan/lit_tests/mutexset3.cc
new file mode 100644
index 0000000..63123f8
--- /dev/null
+++ b/lib/tsan/lit_tests/mutexset3.cc
@@ -0,0 +1,47 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+pthread_mutex_t mtx1;
+pthread_mutex_t mtx2;
+
+void *Thread1(void *x) {
+  sleep(1);
+  pthread_mutex_lock(&mtx1);
+  pthread_mutex_lock(&mtx2);
+  Global++;
+  pthread_mutex_unlock(&mtx2);
+  pthread_mutex_unlock(&mtx1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  Global--;
+  return NULL;
+}
+
+int main() {
+  pthread_mutex_init(&mtx1, 0);
+  pthread_mutex_init(&mtx2, 0);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  pthread_mutex_destroy(&mtx1);
+  pthread_mutex_destroy(&mtx2);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: Write of size 4 at {{.*}} by thread T1
+// CHECK:               (mutexes: write [[M1:M[0-9]+]], write [[M2:M[0-9]+]]):
+// CHECK:   Previous write of size 4 at {{.*}} by thread T2:
+// CHECK:   Mutex [[M1]] created at:
+// CHECK:     #0 pthread_mutex_init
+// CHECK:     #1 main {{.*}}/mutexset3.cc:26
+// CHECK:   Mutex [[M2]] created at:
+// CHECK:     #0 pthread_mutex_init
+// CHECK:     #1 main {{.*}}/mutexset3.cc:27
+
diff --git a/lib/tsan/lit_tests/mutexset4.cc b/lib/tsan/lit_tests/mutexset4.cc
new file mode 100644
index 0000000..68ed475
--- /dev/null
+++ b/lib/tsan/lit_tests/mutexset4.cc
@@ -0,0 +1,47 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+pthread_mutex_t mtx1;
+pthread_mutex_t mtx2;
+
+void *Thread1(void *x) {
+  pthread_mutex_lock(&mtx1);
+  pthread_mutex_lock(&mtx2);
+  Global++;
+  pthread_mutex_unlock(&mtx2);
+  pthread_mutex_unlock(&mtx1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  sleep(1);
+  Global--;
+  return NULL;
+}
+
+int main() {
+  pthread_mutex_init(&mtx1, 0);
+  pthread_mutex_init(&mtx2, 0);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  pthread_mutex_destroy(&mtx1);
+  pthread_mutex_destroy(&mtx2);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4 at {{.*}} by thread T2:
+// CHECK:   Previous write of size 4 at {{.*}} by thread T1
+// CHECK:                 (mutexes: write [[M1:M[0-9]+]], write [[M2:M[0-9]+]]):
+// CHECK:   Mutex [[M1]] created at:
+// CHECK:     #0 pthread_mutex_init
+// CHECK:     #1 main {{.*}}/mutexset4.cc:26
+// CHECK:   Mutex [[M2]] created at:
+// CHECK:     #0 pthread_mutex_init
+// CHECK:     #1 main {{.*}}/mutexset4.cc:27
+
diff --git a/lib/tsan/lit_tests/mutexset5.cc b/lib/tsan/lit_tests/mutexset5.cc
new file mode 100644
index 0000000..85fdadd
--- /dev/null
+++ b/lib/tsan/lit_tests/mutexset5.cc
@@ -0,0 +1,48 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+pthread_mutex_t mtx1;
+pthread_mutex_t mtx2;
+
+void *Thread1(void *x) {
+  sleep(1);
+  pthread_mutex_lock(&mtx1);
+  Global++;
+  pthread_mutex_unlock(&mtx1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  pthread_mutex_lock(&mtx2);
+  Global--;
+  pthread_mutex_unlock(&mtx2);
+  return NULL;
+}
+
+int main() {
+  pthread_mutex_init(&mtx1, 0);
+  pthread_mutex_init(&mtx2, 0);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  pthread_mutex_destroy(&mtx1);
+  pthread_mutex_destroy(&mtx2);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4 at {{.*}} by thread T1
+// CHECK:                              (mutexes: write [[M1:M[0-9]+]]):
+// CHECK:   Previous write of size 4 at {{.*}} by thread T2
+// CHECK:                              (mutexes: write [[M2:M[0-9]+]]):
+// CHECK:   Mutex [[M1]] created at:
+// CHECK:     #0 pthread_mutex_init
+// CHECK:     #1 main {{.*}}/mutexset5.cc:26
+// CHECK:   Mutex [[M2]] created at:
+// CHECK:     #0 pthread_mutex_init
+// CHECK:     #1 main {{.*}}/mutexset5.cc:27
+
diff --git a/lib/tsan/lit_tests/mutexset6.cc b/lib/tsan/lit_tests/mutexset6.cc
new file mode 100644
index 0000000..b28c21c
--- /dev/null
+++ b/lib/tsan/lit_tests/mutexset6.cc
@@ -0,0 +1,55 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+pthread_mutex_t mtx1;
+pthread_spinlock_t mtx2;
+pthread_rwlock_t mtx3;
+
+void *Thread1(void *x) {
+  sleep(1);
+  pthread_mutex_lock(&mtx1);
+  Global++;
+  pthread_mutex_unlock(&mtx1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  pthread_mutex_lock(&mtx1);
+  pthread_mutex_unlock(&mtx1);
+  pthread_spin_lock(&mtx2);
+  pthread_rwlock_rdlock(&mtx3);
+  Global--;
+  pthread_spin_unlock(&mtx2);
+  pthread_rwlock_unlock(&mtx3);
+  return NULL;
+}
+
+int main() {
+  pthread_mutex_init(&mtx1, 0);
+  pthread_spin_init(&mtx2, 0);
+  pthread_rwlock_init(&mtx3, 0);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  pthread_mutex_destroy(&mtx1);
+  pthread_spin_destroy(&mtx2);
+  pthread_rwlock_destroy(&mtx3);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4 at {{.*}} by thread T1
+// CHECK:                          (mutexes: write [[M1:M[0-9]+]]):
+// CHECK:   Previous write of size 4 at {{.*}} by thread T2
+// CHECK:               (mutexes: write [[M2:M[0-9]+]], read [[M3:M[0-9]+]]):
+// CHECK:   Mutex [[M1]] created at:
+// CHECK:     #1 main {{.*}}/mutexset6.cc:31
+// CHECK:   Mutex [[M2]] created at:
+// CHECK:     #1 main {{.*}}/mutexset6.cc:32
+// CHECK:   Mutex [[M3]] created at:
+// CHECK:     #1 main {{.*}}/mutexset6.cc:33
+
diff --git a/lib/tsan/lit_tests/mutexset7.cc b/lib/tsan/lit_tests/mutexset7.cc
new file mode 100644
index 0000000..141bde2
--- /dev/null
+++ b/lib/tsan/lit_tests/mutexset7.cc
@@ -0,0 +1,38 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+
+void *Thread1(void *x) {
+  sleep(1);
+  Global++;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  pthread_mutex_t mtx;
+  pthread_mutex_init(&mtx, 0);
+  pthread_mutex_lock(&mtx);
+  Global--;
+  pthread_mutex_unlock(&mtx);
+  pthread_mutex_destroy(&mtx);
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: Write of size 4 at {{.*}} by thread T1:
+// CHECK: Previous write of size 4 at {{.*}} by thread T2
+// CHECK:                                      (mutexes: write [[M1:M[0-9]+]]):
+// CHECK: Mutex [[M1]] is already destroyed
+// CHECK-NOT: Mutex {{.*}} created at
+
diff --git a/lib/tsan/lit_tests/race_on_barrier.c b/lib/tsan/lit_tests/race_on_barrier.c
index 491201f..3e76f8b 100644
--- a/lib/tsan/lit_tests/race_on_barrier.c
+++ b/lib/tsan/lit_tests/race_on_barrier.c
@@ -14,7 +14,7 @@
 }
 
 void *Thread2(void *x) {
-  usleep(1000000);
+  sleep(1);
   pthread_barrier_wait(&B);
   return NULL;
 }
diff --git a/lib/tsan/lit_tests/race_on_heap.cc b/lib/tsan/lit_tests/race_on_heap.cc
index 855c309..62987bf 100644
--- a/lib/tsan/lit_tests/race_on_heap.cc
+++ b/lib/tsan/lit_tests/race_on_heap.cc
@@ -37,11 +37,11 @@
 // CHECK: addr=[[ADDR:0x[0-9,a-f]+]]
 // CHECK: WARNING: ThreadSanitizer: data race
 // ...
-// CHECK:   Location is heap block of size 99 at [[ADDR]] allocated by thread 1:
+// CHECK: Location is heap block of size 99 at [[ADDR]] allocated by thread T1:
 // CHCEKL     #0 malloc
 // CHECK:     #1 alloc
 // CHECK:     #2 AllocThread
 // ...
-// CHECK:   Thread 1 (tid={{.*}}, finished) created at:
+// CHECK:   Thread T1 (tid={{.*}}, finished) created at:
 // CHECK:     #0 pthread_create
 // CHECK:     #1 main
diff --git a/lib/tsan/lit_tests/race_on_mutex.c b/lib/tsan/lit_tests/race_on_mutex.c
index 6c6697d..de1c2d4 100644
--- a/lib/tsan/lit_tests/race_on_mutex.c
+++ b/lib/tsan/lit_tests/race_on_mutex.c
@@ -16,7 +16,7 @@
 }
 
 void *Thread2(void *x) {
-  usleep(1000000);
+  sleep(1);
   pthread_mutex_lock(&Mtx);
   Global = 43;
   pthread_mutex_unlock(&Mtx);
@@ -34,9 +34,9 @@
 }
 
 // CHECK:      WARNING: ThreadSanitizer: data race
-// CHECK-NEXT:   Read of size 1 at {{.*}} by thread 2:
+// CHECK-NEXT:   Read of size 1 at {{.*}} by thread T2:
 // CHECK-NEXT:     #0 pthread_mutex_lock
 // CHECK-NEXT:     #1 Thread2{{.*}} {{.*}}race_on_mutex.c:20{{(:3)?}} ({{.*}})
-// CHECK:        Previous write of size 1 at {{.*}} by thread 1:
+// CHECK:        Previous write of size 1 at {{.*}} by thread T1:
 // CHECK-NEXT:     #0 pthread_mutex_init {{.*}} ({{.*}})
 // CHECK-NEXT:     #1 Thread1{{.*}} {{.*}}race_on_mutex.c:11{{(:3)?}} ({{.*}})
diff --git a/lib/tsan/lit_tests/race_with_finished_thread.cc b/lib/tsan/lit_tests/race_with_finished_thread.cc
index 4008ecd..cc7834a 100644
--- a/lib/tsan/lit_tests/race_with_finished_thread.cc
+++ b/lib/tsan/lit_tests/race_with_finished_thread.cc
@@ -19,7 +19,7 @@
 }
 
 void *Thread2(void *x) {
-  usleep(1000*1000);
+  sleep(1);
   g_data = 43;
   return NULL;
 }
@@ -34,10 +34,10 @@
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 4 at {{.*}} by thread 2:
-// CHECK:   Previous write of size 4 at {{.*}} by thread 1:
+// CHECK:   Write of size 4 at {{.*}} by thread T2:
+// CHECK:   Previous write of size 4 at {{.*}} by thread T1:
 // CHECK:     #0 foobar
 // CHECK:     #1 Thread1
-// CHECK:   Thread 1 (tid={{.*}}, finished) created at:
+// CHECK:   Thread T1 (tid={{.*}}, finished) created at:
 // CHECK:     #0 pthread_create
 // CHECK:     #1 main
diff --git a/lib/tsan/lit_tests/simple_stack.c b/lib/tsan/lit_tests/simple_stack.c
index b130957..6de20cb 100644
--- a/lib/tsan/lit_tests/simple_stack.c
+++ b/lib/tsan/lit_tests/simple_stack.c
@@ -24,7 +24,7 @@
 }
 
 void *Thread1(void *x) {
-  usleep(1000000);
+  sleep(1);
   bar1();
   return NULL;
 }
@@ -48,19 +48,19 @@
 }
 
 // CHECK:      WARNING: ThreadSanitizer: data race
-// CHECK-NEXT:   Write of size 4 at {{.*}} by thread 1:
+// CHECK-NEXT:   Write of size 4 at {{.*}} by thread T1:
 // CHECK-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack.c:9{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #1 bar1{{.*}} {{.*}}simple_stack.c:14{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 Thread1{{.*}} {{.*}}simple_stack.c:28{{(:3)?}} ({{.*}})
-// CHECK:        Previous read of size 4 at {{.*}} by thread 2:
+// CHECK:        Previous read of size 4 at {{.*}} by thread T2:
 // CHECK-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack.c:18{{(:26)?}} ({{.*}})
 // CHECK-NEXT:     #1 bar2{{.*}} {{.*}}simple_stack.c:23{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 Thread2{{.*}} {{.*}}simple_stack.c:33{{(:3)?}} ({{.*}})
-// CHECK:        Thread 1 (tid={{.*}}, running) created at:
+// CHECK:        Thread T1 (tid={{.*}}, running) created at:
 // CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
 // CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:38{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:43{{(:3)?}} ({{.*}})
-// CHECK:        Thread 2 ({{.*}}) created at:
+// CHECK:        Thread T2 ({{.*}}) created at:
 // CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
 // CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:38{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:44{{(:3)?}} ({{.*}})
diff --git a/lib/tsan/lit_tests/simple_stack2.cc b/lib/tsan/lit_tests/simple_stack2.cc
index ed95c68..bf27a15 100644
--- a/lib/tsan/lit_tests/simple_stack2.cc
+++ b/lib/tsan/lit_tests/simple_stack2.cc
@@ -30,7 +30,7 @@
 }
 
 void *Thread1(void *x) {
-  usleep(1000000);
+  sleep(1);
   bar1();
   return NULL;
 }
@@ -43,7 +43,7 @@
 }
 
 // CHECK:      WARNING: ThreadSanitizer: data race
-// CHECK-NEXT:   Write of size 4 at {{.*}} by thread 1:
+// CHECK-NEXT:   Write of size 4 at {{.*}} by thread T1:
 // CHECK-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack2.cc:9{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #1 bar1{{.*}} {{.*}}simple_stack2.cc:16{{(:3)?}} ({{.*}})
 // CHECK-NEXT:     #2 Thread1{{.*}} {{.*}}simple_stack2.cc:34{{(:3)?}} ({{.*}})
diff --git a/lib/tsan/lit_tests/sleep_sync.cc b/lib/tsan/lit_tests/sleep_sync.cc
index ed563c0..c3d47d3 100644
--- a/lib/tsan/lit_tests/sleep_sync.cc
+++ b/lib/tsan/lit_tests/sleep_sync.cc
@@ -5,7 +5,7 @@
 int X = 0;
 
 void MySleep() {
-  usleep(50*1000);
+  sleep(1);
 }
 
 void *Thread(void *p) {
@@ -25,6 +25,6 @@
 // CHECK: WARNING: ThreadSanitizer: data race
 // ...
 // CHECK:   As if synchronized via sleep:
-// CHECK-NEXT:     #0 usleep
+// CHECK-NEXT:     #0 sleep
 // CHECK-NEXT:     #1 MySleep
 // CHECK-NEXT:     #2 Thread
diff --git a/lib/tsan/lit_tests/sleep_sync2.cc b/lib/tsan/lit_tests/sleep_sync2.cc
index 9a51826..d9961bc 100644
--- a/lib/tsan/lit_tests/sleep_sync2.cc
+++ b/lib/tsan/lit_tests/sleep_sync2.cc
@@ -11,7 +11,7 @@
 
 int main() {
   pthread_t t;
-  usleep(100*1000);
+  sleep(1);
   pthread_create(&t, 0, Thread, 0);
   X = 43;
   pthread_join(t, 0);
diff --git a/lib/tsan/lit_tests/static_init5.cc b/lib/tsan/lit_tests/static_init5.cc
index 9d44eb2..1d0ed6d 100644
--- a/lib/tsan/lit_tests/static_init5.cc
+++ b/lib/tsan/lit_tests/static_init5.cc
@@ -17,7 +17,7 @@
 
 Cache *CreateCache() {
   pthread_t t;
-  pthread_create(&t, 0, AsyncInit, (void*)rand());
+  pthread_create(&t, 0, AsyncInit, (void*)(long)rand());
   void *res;
   pthread_join(t, &res);
   return (Cache*)res;
diff --git a/lib/tsan/lit_tests/test_output.sh b/lib/tsan/lit_tests/test_output.sh
index bd8573e..3798ff0 100755
--- a/lib/tsan/lit_tests/test_output.sh
+++ b/lib/tsan/lit_tests/test_output.sh
@@ -10,7 +10,7 @@
 CXX=clang++
 
 # TODO: add testing for all of -O0...-O3
-CFLAGS="-fthread-sanitizer -fPIE -O1 -g -fno-builtin -Wall"
+CFLAGS="-fsanitize=thread -fPIE -O1 -g -fno-builtin -Wall"
 LDFLAGS="-pie -lpthread -ldl $ROOTDIR/rtl/libtsan.a"
 
 test_file() {
diff --git a/lib/tsan/lit_tests/thread_name.cc b/lib/tsan/lit_tests/thread_name.cc
new file mode 100644
index 0000000..04081c1
--- /dev/null
+++ b/lib/tsan/lit_tests/thread_name.cc
@@ -0,0 +1,34 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+extern "C" void AnnotateThreadName(const char *f, int l, const char *name);
+
+int Global;
+
+void *Thread1(void *x) {
+  sleep(1);
+  AnnotateThreadName(__FILE__, __LINE__, "Thread1");
+  Global++;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  AnnotateThreadName(__FILE__, __LINE__, "Thread2");
+  Global--;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Thread T1 'Thread1'
+// CHECK:   Thread T2 'Thread2'
+
diff --git a/lib/tsan/lit_tests/write_in_reader_lock.cc b/lib/tsan/lit_tests/write_in_reader_lock.cc
new file mode 100644
index 0000000..db8bac3
--- /dev/null
+++ b/lib/tsan/lit_tests/write_in_reader_lock.cc
@@ -0,0 +1,35 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <unistd.h>
+
+pthread_rwlock_t rwlock;
+int GLOB;
+
+void *Thread1(void *p) {
+  (void)p;
+  pthread_rwlock_rdlock(&rwlock);
+  // Write under reader lock.
+  sleep(1);
+  GLOB++;
+  pthread_rwlock_unlock(&rwlock);
+  return 0;
+}
+
+int main(int argc, char *argv[]) {
+  pthread_rwlock_init(&rwlock, NULL);
+  pthread_rwlock_rdlock(&rwlock);
+  pthread_t t;
+  pthread_create(&t, 0, Thread1, 0);
+  volatile int x = GLOB;
+  (void)x;
+  pthread_rwlock_unlock(&rwlock);
+  pthread_join(t, 0);
+  pthread_rwlock_destroy(&rwlock);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4 at {{.*}} by thread T1{{.*}}:
+// CHECK:     #0 Thread1(void*) {{.*}}write_in_reader_lock.cc:13
+// CHECK:   Previous read of size 4 at {{.*}} by main thread{{.*}}:
+// CHECK:     #0 main {{.*}}write_in_reader_lock.cc:23
diff --git a/lib/tsan/rtl/CMakeLists.txt b/lib/tsan/rtl/CMakeLists.txt
index 2538f99..f1a3d3b 100644
--- a/lib/tsan/rtl/CMakeLists.txt
+++ b/lib/tsan/rtl/CMakeLists.txt
@@ -8,7 +8,7 @@
   tsan_md5.cc
   tsan_mman.cc
   tsan_mutex.cc
-  tsan_printf.cc
+  tsan_mutexset.cc
   tsan_report.cc
   tsan_rtl.cc
   tsan_rtl_mutex.cc
diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index 32ed91d..f8745ec 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc
@@ -105,13 +105,6 @@
   release(dst);
 }
 
-void ThreadClock::Disable(unsigned tid) {
-  u64 c0 = clk_[tid];
-  for (uptr i = 0; i < kMaxTidInClock; i++)
-    clk_[i] = (u64)-1;
-  clk_[tid] = c0;
-}
-
 SyncClock::SyncClock()
   : clk_(MBlockClock) {
 }
diff --git a/lib/tsan/rtl/tsan_clock.h b/lib/tsan/rtl/tsan_clock.h
index 02ddb9a..0ee9374 100644
--- a/lib/tsan/rtl/tsan_clock.h
+++ b/lib/tsan/rtl/tsan_clock.h
@@ -61,8 +61,6 @@
       nclk_ = tid + 1;
   }
 
-  void Disable(unsigned tid);
-
   uptr size() const {
     return nclk_;
   }
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index 36dd4c9..e0c0473 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -24,6 +24,16 @@
 
 namespace __tsan {
 
+#ifdef TSAN_GO
+const bool kGoMode = true;
+const bool kCppMode = false;
+const char *const kTsanOptionsEnv = "GORACE";
+#else
+const bool kGoMode = false;
+const bool kCppMode = true;
+const char *const kTsanOptionsEnv = "TSAN_OPTIONS";
+#endif
+
 const int kTidBits = 13;
 const unsigned kMaxTid = 1 << kTidBits;
 const unsigned kMaxTidInClock = kMaxTid * 2;  // This includes msb 'freed' bit.
@@ -36,20 +46,23 @@
 #ifdef TSAN_SHADOW_COUNT
 # if TSAN_SHADOW_COUNT == 2 \
   || TSAN_SHADOW_COUNT == 4 || TSAN_SHADOW_COUNT == 8
-const unsigned kShadowCnt = TSAN_SHADOW_COUNT;
+const uptr kShadowCnt = TSAN_SHADOW_COUNT;
 # else
 #   error "TSAN_SHADOW_COUNT must be one of 2,4,8"
 # endif
 #else
 // Count of shadow values in a shadow cell.
-const unsigned kShadowCnt = 8;
+const uptr kShadowCnt = 4;
 #endif
 
 // That many user bytes are mapped onto a single shadow cell.
-const unsigned kShadowCell = 8;
+const uptr kShadowCell = 8;
 
 // Size of a single shadow value (u64).
-const unsigned kShadowSize = 8;
+const uptr kShadowSize = 8;
+
+// Shadow memory is kShadowMultiplier times larger than user memory.
+const uptr kShadowMultiplier = kShadowSize * kShadowCnt / kShadowCell;
 
 #if defined(TSAN_COLLECT_STATS) && TSAN_COLLECT_STATS
 const bool kCollectStats = true;
@@ -115,11 +128,23 @@
 }
 
 template<typename T>
-T RoundUp(T p, int align) {
+T RoundUp(T p, u64 align) {
   DCHECK_EQ(align & (align - 1), 0);
   return (T)(((u64)p + align - 1) & ~(align - 1));
 }
 
+template<typename T>
+T RoundDown(T p, u64 align) {
+  DCHECK_EQ(align & (align - 1), 0);
+  return (T)((u64)p & ~(align - 1));
+}
+
+// Zeroizes high part, returns 'bits' lsb bits.
+template<typename T>
+T GetLsb(T v, int bits) {
+  return (T)((u64)v & ((1ull << bits) - 1));
+}
+
 struct MD5Hash {
   u64 hash[2];
   bool operator==(const MD5Hash &other) const;
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
index a854d7a..25c370f 100644
--- a/lib/tsan/rtl/tsan_flags.cc
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -40,6 +40,8 @@
   f->enable_annotations = true;
   f->suppress_equal_stacks = true;
   f->suppress_equal_addresses = true;
+  f->suppress_java = false;
+  f->report_bugs = true;
   f->report_thread_leaks = true;
   f->report_destroy_locked = true;
   f->report_signal_unsafe = true;
@@ -47,7 +49,7 @@
   f->strip_path_prefix = "";
   f->suppressions = "";
   f->exitcode = 66;
-  f->log_fileno = 2;
+  f->log_path = "stderr";
   f->atexit_sleep_ms = 1000;
   f->verbosity = 0;
   f->profile_memory = "";
@@ -55,6 +57,7 @@
   f->stop_on_start = false;
   f->running_on_valgrind = false;
   f->external_symbolizer_path = "";
+  f->history_size = kGoMode ? 1 : 2;  // There are a lot of goroutines in Go.
 
   // Let a frontend override.
   OverrideFlags(f);
@@ -63,6 +66,8 @@
   ParseFlag(env, &f->enable_annotations, "enable_annotations");
   ParseFlag(env, &f->suppress_equal_stacks, "suppress_equal_stacks");
   ParseFlag(env, &f->suppress_equal_addresses, "suppress_equal_addresses");
+  ParseFlag(env, &f->suppress_java, "suppress_java");
+  ParseFlag(env, &f->report_bugs, "report_bugs");
   ParseFlag(env, &f->report_thread_leaks, "report_thread_leaks");
   ParseFlag(env, &f->report_destroy_locked, "report_destroy_locked");
   ParseFlag(env, &f->report_signal_unsafe, "report_signal_unsafe");
@@ -70,13 +75,26 @@
   ParseFlag(env, &f->strip_path_prefix, "strip_path_prefix");
   ParseFlag(env, &f->suppressions, "suppressions");
   ParseFlag(env, &f->exitcode, "exitcode");
-  ParseFlag(env, &f->log_fileno, "log_fileno");
+  ParseFlag(env, &f->log_path, "log_path");
   ParseFlag(env, &f->atexit_sleep_ms, "atexit_sleep_ms");
   ParseFlag(env, &f->verbosity, "verbosity");
   ParseFlag(env, &f->profile_memory, "profile_memory");
   ParseFlag(env, &f->flush_memory_ms, "flush_memory_ms");
   ParseFlag(env, &f->stop_on_start, "stop_on_start");
   ParseFlag(env, &f->external_symbolizer_path, "external_symbolizer_path");
+  ParseFlag(env, &f->history_size, "history_size");
+
+  if (!f->report_bugs) {
+    f->report_thread_leaks = false;
+    f->report_destroy_locked = false;
+    f->report_signal_unsafe = false;
+  }
+
+  if (f->history_size < 0 || f->history_size > 7) {
+    Printf("ThreadSanitizer: incorrect value for history_size"
+           " (must be [0..7])\n");
+    Die();
+  }
 }
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_flags.h b/lib/tsan/rtl/tsan_flags.h
index 929e6f5..2fdcc2f 100644
--- a/lib/tsan/rtl/tsan_flags.h
+++ b/lib/tsan/rtl/tsan_flags.h
@@ -31,6 +31,11 @@
   // Supress a race report if we've already output another race report
   // on the same address.
   bool suppress_equal_addresses;
+  // Suppress weird race reports that can be seen if JVM is embed
+  // into the process.
+  bool suppress_java;
+  // Turns off bug reporting entirely (useful for benchmarking).
+  bool report_bugs;
   // Report thread leaks at exit?
   bool report_thread_leaks;
   // Report destruction of a locked mutex?
@@ -47,8 +52,10 @@
   const char *suppressions;
   // Override exit status if something was reported.
   int exitcode;
-  // Log fileno (1 - stdout, 2 - stderr).
-  int log_fileno;
+  // Write logs to "log_path.pid".
+  // The special values are "stdout" and "stderr".
+  // The default is "stderr".
+  const char *log_path;
   // Sleep in main thread before exiting for that many ms
   // (useful to catch "at exit" races).
   int atexit_sleep_ms;
@@ -64,6 +71,12 @@
   bool running_on_valgrind;
   // Path to external symbolizer.
   const char *external_symbolizer_path;
+  // Per-thread history size, controls how many previous memory accesses
+  // are remembered per thread.  Possible values are [0..7].
+  // history_size=0 amounts to 32K memory accesses.  Each next value doubles
+  // the amount of memory accesses, up to history_size=7 that amounts to
+  // 4M memory accesses.  The default value is 2 (128K memory accesses).
+  int history_size;
 };
 
 Flags *flags();
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
index 4e09d8e..dad29cf 100644
--- a/lib/tsan/rtl/tsan_interceptors.cc
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -15,7 +15,7 @@
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
-#include "tsan_interceptors.h"
+#include "interception/interception.h"
 #include "tsan_interface.h"
 #include "tsan_platform.h"
 #include "tsan_rtl.h"
@@ -115,6 +115,7 @@
 };
 
 struct SignalContext {
+  int in_blocking_func;
   int int_signal_send;
   int pending_signal_count;
   SignalDesc pending_signals[kSigCount];
@@ -136,7 +137,14 @@
 
 static unsigned g_thread_finalize_key;
 
-static void process_pending_signals(ThreadState *thr);
+class ScopedInterceptor {
+ public:
+  ScopedInterceptor(ThreadState *thr, const char *fname, uptr pc);
+  ~ScopedInterceptor();
+ private:
+  ThreadState *const thr_;
+  const int in_rtl_;
+};
 
 ScopedInterceptor::ScopedInterceptor(ThreadState *thr, const char *fname,
                                      uptr pc)
@@ -156,28 +164,67 @@
   thr_->in_rtl--;
   if (thr_->in_rtl == 0) {
     FuncExit(thr_);
-    process_pending_signals(thr_);
+    ProcessPendingSignals(thr_);
   }
   CHECK_EQ(in_rtl_, thr_->in_rtl);
 }
 
+#define SCOPED_INTERCEPTOR_RAW(func, ...) \
+    ThreadState *thr = cur_thread(); \
+    StatInc(thr, StatInterceptor); \
+    StatInc(thr, StatInt_##func); \
+    const uptr caller_pc = GET_CALLER_PC(); \
+    ScopedInterceptor si(thr, #func, caller_pc); \
+    /* Subtract one from pc as we need current instruction address */ \
+    const uptr pc = __sanitizer::StackTrace::GetCurrentPc() - 1; \
+    (void)pc; \
+/**/
+
+#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
+    SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
+    if (REAL(func) == 0) { \
+      Printf("FATAL: ThreadSanitizer: failed to intercept %s\n", #func); \
+      Die(); \
+    } \
+    if (thr->in_rtl > 1) \
+      return REAL(func)(__VA_ARGS__); \
+/**/
+
+#define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
+#define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
+
+#define BLOCK_REAL(name) (BlockingCall(thr), REAL(name))
+
+struct BlockingCall {
+  explicit BlockingCall(ThreadState *thr)
+      : ctx(SigCtx(thr)) {
+    ctx->in_blocking_func++;
+  }
+
+  ~BlockingCall() {
+    ctx->in_blocking_func--;
+  }
+
+  SignalContext *ctx;
+};
+
 TSAN_INTERCEPTOR(unsigned, sleep, unsigned sec) {
   SCOPED_TSAN_INTERCEPTOR(sleep, sec);
-  unsigned res = sleep(sec);
+  unsigned res = BLOCK_REAL(sleep)(sec);
   AfterSleep(thr, pc);
   return res;
 }
 
 TSAN_INTERCEPTOR(int, usleep, long_t usec) {
   SCOPED_TSAN_INTERCEPTOR(usleep, usec);
-  int res = usleep(usec);
+  int res = BLOCK_REAL(usleep)(usec);
   AfterSleep(thr, pc);
   return res;
 }
 
 TSAN_INTERCEPTOR(int, nanosleep, void *req, void *rem) {
   SCOPED_TSAN_INTERCEPTOR(nanosleep, req, rem);
-  int res = nanosleep(req, rem);
+  int res = BLOCK_REAL(nanosleep)(req, rem);
   AfterSleep(thr, pc);
   return res;
 }
@@ -238,7 +285,6 @@
   {
     ScopedInRtl in_rtl;
     DestroyAndFree(atexit_ctx);
-    REAL(usleep)(flags()->atexit_sleep_ms * 1000);
   }
   int status = Finalize(cur_thread());
   if (status)
@@ -248,31 +294,216 @@
 TSAN_INTERCEPTOR(int, atexit, void (*f)()) {
   SCOPED_TSAN_INTERCEPTOR(atexit, f);
   return atexit_ctx->atexit(thr, pc, f);
-  return 0;
 }
 
 TSAN_INTERCEPTOR(void, longjmp, void *env, int val) {
   SCOPED_TSAN_INTERCEPTOR(longjmp, env, val);
-  TsanPrintf("ThreadSanitizer: longjmp() is not supported\n");
+  Printf("ThreadSanitizer: longjmp() is not supported\n");
   Die();
 }
 
 TSAN_INTERCEPTOR(void, siglongjmp, void *env, int val) {
   SCOPED_TSAN_INTERCEPTOR(siglongjmp, env, val);
-  TsanPrintf("ThreadSanitizer: siglongjmp() is not supported\n");
+  Printf("ThreadSanitizer: siglongjmp() is not supported\n");
   Die();
 }
 
-static uptr fd2addr(int fd) {
-  (void)fd;
-  static u64 addr;
-  return (uptr)&addr;
+enum FdType {
+  FdGlobal,  // Something we don't know about, global sync.
+  FdNone,  // Does not require any sync.
+  FdFile,
+  FdSock,
+  FdPipe,
+  FdEvent,  // see eventfd()
+  FdPoll
+};
+
+struct FdDesc {
+  FdType type;
+  u64 sync;
+};
+
+struct FdContext {
+  static const int kMaxFds = 10 * 1024;  // Everything else is synced globally.
+  FdDesc desc[kMaxFds];
+  // Addresses used for synchronization.
+  u64 fdglobal;
+  u64 fdfile;
+  u64 fdsock;
+  u64 fdpipe;
+  u64 fdpoll;
+  u64 fdevent;
+};
+
+static FdContext fdctx;
+
+static void FdInit() {
+  fdctx.desc[0].type = FdNone;
+  fdctx.desc[1].type = FdNone;
+  fdctx.desc[2].type = FdNone;
 }
 
-static uptr epollfd2addr(int fd) {
-  (void)fd;
-  static u64 addr;
-  return (uptr)&addr;
+static void *FdAddr(int fd) {
+  if (fd >= FdContext::kMaxFds)
+    return &fdctx.fdglobal;
+  FdDesc *desc = &fdctx.desc[fd];
+  if (desc->type == FdNone)
+    return 0;
+  if (desc->type == FdGlobal)
+    return &fdctx.fdglobal;
+  if (desc->type == FdFile)
+    return &fdctx.fdfile;
+  if (desc->type == FdSock)
+    return &fdctx.fdsock;
+  if (desc->type == FdPipe)
+    return &fdctx.fdpipe;
+  if (desc->type == FdEvent)
+    return &fdctx.fdevent;
+  if (desc->type == FdPoll)
+    return &fdctx.fdpoll;
+  CHECK(0);
+  return 0;
+}
+
+static void FdAcquire(ThreadState *thr, uptr pc, int fd) {
+  void *addr = FdAddr(fd);
+  DPrintf("#%d: FdAcquire(%d) -> %p\n", thr->tid, fd, addr);
+  if (addr)
+    Acquire(thr, pc, (uptr)addr);
+  if (fd < FdContext::kMaxFds)
+    MemoryRead8Byte(thr, pc, (uptr)&fdctx.desc[fd].sync);
+}
+
+static void FdRelease(ThreadState *thr, uptr pc, int fd) {
+  void *addr = FdAddr(fd);
+  DPrintf("#%d: FdRelease(%d) -> %p\n", thr->tid, fd, addr);
+  if (addr)
+    Release(thr, pc, (uptr)addr);
+  if (fd < FdContext::kMaxFds)
+    MemoryRead8Byte(thr, pc, (uptr)&fdctx.desc[fd].sync);
+}
+
+static void FdClose(ThreadState *thr, uptr pc, int fd) {
+  if (fd >= FdContext::kMaxFds)
+    return;
+  FdDesc *desc = &fdctx.desc[fd];
+  SyncVar *s = CTX()->synctab.GetAndRemove(thr, pc, (uptr)&desc->sync);
+  if (s)
+    DestroyAndFree(s);
+  // FIXME(dvyukov): change to FdNone once we handle all fd operations.
+  desc->type = FdGlobal;
+  // To catch races between fd usage and close.
+  MemoryWrite8Byte(thr, pc, (uptr)&desc->sync);
+  // We need to clear it, because if we do not intercept any call out there
+  // that creates fd, we will hit false postives.
+  MemoryResetRange(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
+}
+
+static void FdFileCreate(ThreadState *thr, uptr pc, int fd) {
+  if (fd >= FdContext::kMaxFds)
+    return;
+  FdDesc *desc = &fdctx.desc[fd];
+  desc->type = FdFile;
+  // To catch races between fd usage and open.
+  MemoryRangeImitateWrite(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
+}
+
+static void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd) {
+  if (oldfd >= FdContext::kMaxFds || newfd >= FdContext::kMaxFds) {
+    if (oldfd < FdContext::kMaxFds) {
+      // FIXME(dvyukov): here we lose old sync object associated with the fd,
+      // this can lead to false positives.
+      FdDesc *odesc = &fdctx.desc[oldfd];
+      odesc->type = FdGlobal;
+    }
+    if (newfd < FdContext::kMaxFds) {
+      FdClose(thr, pc, newfd);
+      FdDesc *ndesc = &fdctx.desc[newfd];
+      ndesc->type = FdGlobal;
+    }
+    return;
+  }
+
+  FdClose(thr, pc, newfd);
+  FdDesc *ndesc = &fdctx.desc[newfd];
+  ndesc->type = FdFile;
+  // To catch races between fd usage and open.
+  MemoryRangeImitateWrite(thr, pc, (uptr)&ndesc->sync, sizeof(ndesc->sync));
+}
+
+static void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd) {
+  if (rfd >= FdContext::kMaxFds || wfd >= FdContext::kMaxFds) {
+    if (rfd < FdContext::kMaxFds) {
+      FdDesc *rdesc = &fdctx.desc[rfd];
+      rdesc->type = FdGlobal;
+    }
+    if (wfd < FdContext::kMaxFds) {
+      FdDesc *wdesc = &fdctx.desc[wfd];
+      wdesc->type = FdGlobal;
+    }
+    return;
+  }
+
+  FdDesc *rdesc = &fdctx.desc[rfd];
+  rdesc->type = FdPipe;
+  // To catch races between fd usage and open.
+  MemoryRangeImitateWrite(thr, pc, (uptr)&rdesc->sync, sizeof(rdesc->sync));
+
+  FdDesc *wdesc = &fdctx.desc[wfd];
+  wdesc->type = FdPipe;
+  // To catch races between fd usage and open.
+  MemoryRangeImitateWrite(thr, pc, (uptr)&wdesc->sync, sizeof(rdesc->sync));
+
+  DPrintf("#%d: FdCreatePipe(%d, %d)\n", thr->tid, rfd, wfd);
+}
+
+static void FdEventCreate(ThreadState *thr, uptr pc, int fd) {
+  if (fd >= FdContext::kMaxFds)
+    return;
+  FdDesc *desc = &fdctx.desc[fd];
+  desc->type = FdEvent;
+  // To catch races between fd usage and open.
+  MemoryRangeImitateWrite(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
+}
+
+static void FdPollCreate(ThreadState *thr, uptr pc, int fd) {
+  if (fd >= FdContext::kMaxFds)
+    return;
+  FdDesc *desc = &fdctx.desc[fd];
+  desc->type = FdPoll;
+  // To catch races between fd usage and open.
+  MemoryRangeImitateWrite(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
+}
+
+static void FdSocketCreate(ThreadState *thr, uptr pc, int fd) {
+  if (fd >= FdContext::kMaxFds)
+    return;
+  FdDesc *desc = &fdctx.desc[fd];
+  // It can be UDP socket, let's assume they are not used for synchronization.
+  desc->type = FdNone;
+  // To catch races between fd usage and open.
+  MemoryRangeImitateWrite(thr, pc, (uptr)&desc->sync, sizeof(desc->sync));
+}
+
+static void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd) {
+  if (fd < FdContext::kMaxFds) {
+    FdDesc *desc = &fdctx.desc[fd];
+    desc->type = FdNone;
+    MemoryRead8Byte(thr, pc, (uptr)&desc->sync);
+  }
+  if (newfd < FdContext::kMaxFds) {
+    FdDesc *desc = &fdctx.desc[newfd];
+    desc->type = FdSock;
+    MemoryWrite8Byte(thr, pc, (uptr)&desc->sync);
+  }
+}
+
+static void FdSocketConnect(ThreadState *thr, uptr pc, int fd) {
+  if (fd >= FdContext::kMaxFds)
+    return;
+  FdDesc *desc = &fdctx.desc[fd];
+  desc->type = FdSock;
+  MemoryWrite8Byte(thr, pc, (uptr)&desc->sync);
 }
 
 static uptr file2addr(char *path) {
@@ -297,6 +528,11 @@
   return p;
 }
 
+TSAN_INTERCEPTOR(void*, __libc_memalign, uptr align, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(__libc_memalign, align, sz);
+  return user_alloc(thr, pc, sz, align);
+}
+
 TSAN_INTERCEPTOR(void*, calloc, uptr size, uptr n) {
   void *p = 0;
   {
@@ -553,13 +789,13 @@
 
 TSAN_INTERCEPTOR(void*, valloc, uptr sz) {
   SCOPED_TSAN_INTERCEPTOR(valloc, sz);
-  return user_alloc(thr, pc, sz, kPageSize);
+  return user_alloc(thr, pc, sz, GetPageSizeCached());
 }
 
 TSAN_INTERCEPTOR(void*, pvalloc, uptr sz) {
   SCOPED_TSAN_INTERCEPTOR(pvalloc, sz);
-  sz = RoundUp(sz, kPageSize);
-  return user_alloc(thr, pc, sz, kPageSize);
+  sz = RoundUp(sz, GetPageSizeCached());
+  return user_alloc(thr, pc, sz, GetPageSizeCached());
 }
 
 TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) {
@@ -569,28 +805,38 @@
 }
 
 // Used in thread-safe function static initialization.
-TSAN_INTERCEPTOR(int, __cxa_guard_acquire, char *m) {
-  SCOPED_TSAN_INTERCEPTOR(__cxa_guard_acquire, m);
-  int res = REAL(__cxa_guard_acquire)(m);
-  if (res) {
-    // This thread does the init.
-  } else {
-    Acquire(thr, pc, (uptr)m);
+extern "C" int INTERFACE_ATTRIBUTE __cxa_guard_acquire(atomic_uint32_t *g) {
+  SCOPED_INTERCEPTOR_RAW(__cxa_guard_acquire, g);
+  for (;;) {
+    u32 cmp = atomic_load(g, memory_order_acquire);
+    if (cmp == 0) {
+      if (atomic_compare_exchange_strong(g, &cmp, 1<<16, memory_order_relaxed))
+        return 1;
+    } else if (cmp == 1) {
+      Acquire(thr, pc, (uptr)g);
+      return 0;
+    } else {
+      internal_sched_yield();
+    }
   }
-  return res;
 }
 
-TSAN_INTERCEPTOR(void, __cxa_guard_release, char *m) {
-  SCOPED_TSAN_INTERCEPTOR(__cxa_guard_release, m);
-  Release(thr, pc, (uptr)m);
-  REAL(__cxa_guard_release)(m);
+extern "C" void INTERFACE_ATTRIBUTE __cxa_guard_release(atomic_uint32_t *g) {
+  SCOPED_INTERCEPTOR_RAW(__cxa_guard_release, g);
+  Release(thr, pc, (uptr)g);
+  atomic_store(g, 1, memory_order_release);
+}
+
+extern "C" void INTERFACE_ATTRIBUTE __cxa_guard_abort(atomic_uint32_t *g) {
+  SCOPED_INTERCEPTOR_RAW(__cxa_guard_abort, g);
+  atomic_store(g, 0, memory_order_relaxed);
 }
 
 static void thread_finalize(void *v) {
   uptr iter = (uptr)v;
   if (iter > 1) {
     if (pthread_setspecific(g_thread_finalize_key, (void*)(iter - 1))) {
-      TsanPrintf("ThreadSanitizer: failed to set thread key\n");
+      Printf("ThreadSanitizer: failed to set thread key\n");
       Die();
     }
     return;
@@ -623,7 +869,7 @@
     ThreadState *thr = cur_thread();
     ScopedInRtl in_rtl;
     if (pthread_setspecific(g_thread_finalize_key, (void*)4)) {
-      TsanPrintf("ThreadSanitizer: failed to set thread key\n");
+      Printf("ThreadSanitizer: failed to set thread key\n");
       Die();
     }
     while ((tid = atomic_load(&p->tid, memory_order_acquire)) == 0)
@@ -678,7 +924,7 @@
 TSAN_INTERCEPTOR(int, pthread_join, void *th, void **ret) {
   SCOPED_TSAN_INTERCEPTOR(pthread_join, th, ret);
   int tid = ThreadTid(thr, pc, (uptr)th);
-  int res = REAL(pthread_join)(th, ret);
+  int res = BLOCK_REAL(pthread_join)(th, ret);
   if (res == 0) {
     ThreadJoin(thr, pc, tid);
   }
@@ -981,7 +1227,7 @@
 
 TSAN_INTERCEPTOR(int, sem_wait, void *s) {
   SCOPED_TSAN_INTERCEPTOR(sem_wait, s);
-  int res = REAL(sem_wait)(s);
+  int res = BLOCK_REAL(sem_wait)(s);
   if (res == 0) {
     Acquire(thr, pc, (uptr)s);
   }
@@ -990,7 +1236,7 @@
 
 TSAN_INTERCEPTOR(int, sem_trywait, void *s) {
   SCOPED_TSAN_INTERCEPTOR(sem_trywait, s);
-  int res = REAL(sem_trywait)(s);
+  int res = BLOCK_REAL(sem_trywait)(s);
   if (res == 0) {
     Acquire(thr, pc, (uptr)s);
   }
@@ -999,7 +1245,7 @@
 
 TSAN_INTERCEPTOR(int, sem_timedwait, void *s, void *abstime) {
   SCOPED_TSAN_INTERCEPTOR(sem_timedwait, s, abstime);
-  int res = REAL(sem_timedwait)(s, abstime);
+  int res = BLOCK_REAL(sem_timedwait)(s, abstime);
   if (res == 0) {
     Acquire(thr, pc, (uptr)s);
   }
@@ -1022,11 +1268,129 @@
   return res;
 }
 
+TSAN_INTERCEPTOR(int, open, const char *name, int flags, int mode) {
+  SCOPED_TSAN_INTERCEPTOR(open, name, flags, mode);
+  int fd = REAL(open)(name, flags, mode);
+  if (fd >= 0)
+    FdFileCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, creat, const char *name, int mode) {
+  SCOPED_TSAN_INTERCEPTOR(creat, name, mode);
+  int fd = REAL(creat)(name, mode);
+  if (fd >= 0)
+    FdFileCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, dup, int oldfd) {
+  SCOPED_TSAN_INTERCEPTOR(dup, oldfd);
+  int newfd = REAL(dup)(oldfd);
+  if (newfd >= 0 && newfd != oldfd)
+    FdDup(thr, pc, oldfd, newfd);
+  return newfd;
+}
+
+TSAN_INTERCEPTOR(int, dup2, int oldfd, int newfd) {
+  SCOPED_TSAN_INTERCEPTOR(dup2, oldfd, newfd);
+  int newfd2 = REAL(dup2)(oldfd, newfd);
+  if (newfd2 >= 0 && newfd2 != oldfd)
+    FdDup(thr, pc, oldfd, newfd2);
+  return newfd2;
+}
+
+TSAN_INTERCEPTOR(int, dup3, int oldfd, int newfd, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(dup3, oldfd, newfd, flags);
+  int newfd2 = REAL(dup3)(oldfd, newfd, flags);
+  if (newfd2 >= 0 && newfd2 != oldfd)
+    FdDup(thr, pc, oldfd, newfd2);
+  return newfd2;
+}
+
+TSAN_INTERCEPTOR(int, eventfd, unsigned initval, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(eventfd, initval, flags);
+  int fd = REAL(eventfd)(initval, flags);
+  if (fd >= 0)
+    FdEventCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, socket, int domain, int type, int protocol) {
+  SCOPED_TSAN_INTERCEPTOR(socket, domain, type, protocol);
+  int fd = REAL(socket)(domain, type, protocol);
+  if (fd >= 0)
+    FdSocketCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, connect, int fd, void *addr, unsigned addrlen) {
+  SCOPED_TSAN_INTERCEPTOR(connect, fd, addr, addrlen);
+  int res = REAL(connect)(fd, addr, addrlen);
+  if (res == 0)
+    FdSocketConnect(thr, pc, fd);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, accept, int fd, void *addr, unsigned *addrlen) {
+  SCOPED_TSAN_INTERCEPTOR(accept, fd, addr, addrlen);
+  int fd2 = REAL(accept)(fd, addr, addrlen);
+  if (fd2 >= 0)
+    FdSocketAccept(thr, pc, fd, fd2);
+  return fd2;
+}
+
+TSAN_INTERCEPTOR(int, accept4, int fd, void *addr, unsigned *addrlen, int f) {
+  SCOPED_TSAN_INTERCEPTOR(accept4, fd, addr, addrlen, f);
+  int fd2 = REAL(accept4)(fd, addr, addrlen, f);
+  if (fd2 >= 0)
+    FdSocketAccept(thr, pc, fd, fd2);
+  return fd2;
+}
+
+TSAN_INTERCEPTOR(int, epoll_create, int size) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_create, size);
+  int fd = REAL(epoll_create)(size);
+  if (fd >= 0)
+    FdPollCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, epoll_create1, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_create1, flags);
+  int fd = REAL(epoll_create1)(flags);
+  if (fd >= 0)
+    FdPollCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, close, int fd) {
+  SCOPED_TSAN_INTERCEPTOR(close, fd);
+  FdClose(thr, pc, fd);
+  return REAL(close)(fd);
+}
+
+TSAN_INTERCEPTOR(int, pipe, int *pipefd) {
+  SCOPED_TSAN_INTERCEPTOR(pipe, pipefd);
+  int res = REAL(pipe)(pipefd);
+  if (res == 0)
+    FdPipeCreate(thr, pc, pipefd[0], pipefd[1]);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pipe2, int *pipefd, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(pipe2, pipefd, flags);
+  int res = REAL(pipe2)(pipefd, flags);
+  if (res == 0)
+    FdPipeCreate(thr, pc, pipefd[0], pipefd[1]);
+  return res;
+}
+
 TSAN_INTERCEPTOR(long_t, read, int fd, void *buf, long_t sz) {
   SCOPED_TSAN_INTERCEPTOR(read, fd, buf, sz);
   int res = REAL(read)(fd, buf, sz);
   if (res >= 0) {
-    Acquire(thr, pc, fd2addr(fd));
+    FdAcquire(thr, pc, fd);
   }
   return res;
 }
@@ -1035,7 +1399,7 @@
   SCOPED_TSAN_INTERCEPTOR(pread, fd, buf, sz, off);
   int res = REAL(pread)(fd, buf, sz, off);
   if (res >= 0) {
-    Acquire(thr, pc, fd2addr(fd));
+    FdAcquire(thr, pc, fd);
   }
   return res;
 }
@@ -1044,7 +1408,7 @@
   SCOPED_TSAN_INTERCEPTOR(pread64, fd, buf, sz, off);
   int res = REAL(pread64)(fd, buf, sz, off);
   if (res >= 0) {
-    Acquire(thr, pc, fd2addr(fd));
+    FdAcquire(thr, pc, fd);
   }
   return res;
 }
@@ -1053,7 +1417,7 @@
   SCOPED_TSAN_INTERCEPTOR(readv, fd, vec, cnt);
   int res = REAL(readv)(fd, vec, cnt);
   if (res >= 0) {
-    Acquire(thr, pc, fd2addr(fd));
+    FdAcquire(thr, pc, fd);
   }
   return res;
 }
@@ -1062,56 +1426,56 @@
   SCOPED_TSAN_INTERCEPTOR(preadv64, fd, vec, cnt, off);
   int res = REAL(preadv64)(fd, vec, cnt, off);
   if (res >= 0) {
-    Acquire(thr, pc, fd2addr(fd));
+    FdAcquire(thr, pc, fd);
   }
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, write, int fd, void *buf, long_t sz) {
   SCOPED_TSAN_INTERCEPTOR(write, fd, buf, sz);
-  Release(thr, pc, fd2addr(fd));
+  FdRelease(thr, pc, fd);
   int res = REAL(write)(fd, buf, sz);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, pwrite, int fd, void *buf, long_t sz, unsigned off) {
   SCOPED_TSAN_INTERCEPTOR(pwrite, fd, buf, sz, off);
-  Release(thr, pc, fd2addr(fd));
+  FdRelease(thr, pc, fd);
   int res = REAL(pwrite)(fd, buf, sz, off);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, pwrite64, int fd, void *buf, long_t sz, u64 off) {
   SCOPED_TSAN_INTERCEPTOR(pwrite64, fd, buf, sz, off);
-  Release(thr, pc, fd2addr(fd));
+  FdRelease(thr, pc, fd);
   int res = REAL(pwrite64)(fd, buf, sz, off);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, writev, int fd, void *vec, int cnt) {
   SCOPED_TSAN_INTERCEPTOR(writev, fd, vec, cnt);
-  Release(thr, pc, fd2addr(fd));
+  FdRelease(thr, pc, fd);
   int res = REAL(writev)(fd, vec, cnt);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, pwritev64, int fd, void *vec, int cnt, u64 off) {
   SCOPED_TSAN_INTERCEPTOR(pwritev64, fd, vec, cnt, off);
-  Release(thr, pc, fd2addr(fd));
+  FdRelease(thr, pc, fd);
   int res = REAL(pwritev64)(fd, vec, cnt, off);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, send, int fd, void *buf, long_t len, int flags) {
   SCOPED_TSAN_INTERCEPTOR(send, fd, buf, len, flags);
-  Release(thr, pc, fd2addr(fd));
+  FdRelease(thr, pc, fd);
   int res = REAL(send)(fd, buf, len, flags);
   return res;
 }
 
 TSAN_INTERCEPTOR(long_t, sendmsg, int fd, void *msg, int flags) {
   SCOPED_TSAN_INTERCEPTOR(sendmsg, fd, msg, flags);
-  Release(thr, pc, fd2addr(fd));
+  FdRelease(thr, pc, fd);
   int res = REAL(sendmsg)(fd, msg, flags);
   return res;
 }
@@ -1120,7 +1484,7 @@
   SCOPED_TSAN_INTERCEPTOR(recv, fd, buf, len, flags);
   int res = REAL(recv)(fd, buf, len, flags);
   if (res >= 0) {
-    Acquire(thr, pc, fd2addr(fd));
+    FdAcquire(thr, pc, fd);
   }
   return res;
 }
@@ -1129,7 +1493,7 @@
   SCOPED_TSAN_INTERCEPTOR(recvmsg, fd, msg, flags);
   int res = REAL(recvmsg)(fd, msg, flags);
   if (res >= 0) {
-    Acquire(thr, pc, fd2addr(fd));
+    FdAcquire(thr, pc, fd);
   }
   return res;
 }
@@ -1183,7 +1547,7 @@
 TSAN_INTERCEPTOR(int, epoll_ctl, int epfd, int op, int fd, void *ev) {
   SCOPED_TSAN_INTERCEPTOR(epoll_ctl, epfd, op, fd, ev);
   if (op == EPOLL_CTL_ADD) {
-    Release(thr, pc, epollfd2addr(epfd));
+    FdRelease(thr, pc, epfd);
   }
   int res = REAL(epoll_ctl)(epfd, op, fd, ev);
   return res;
@@ -1191,13 +1555,19 @@
 
 TSAN_INTERCEPTOR(int, epoll_wait, int epfd, void *ev, int cnt, int timeout) {
   SCOPED_TSAN_INTERCEPTOR(epoll_wait, epfd, ev, cnt, timeout);
-  int res = REAL(epoll_wait)(epfd, ev, cnt, timeout);
+  int res = BLOCK_REAL(epoll_wait)(epfd, ev, cnt, timeout);
   if (res > 0) {
-    Acquire(thr, pc, epollfd2addr(epfd));
+    FdAcquire(thr, pc, epfd);
   }
   return res;
 }
 
+TSAN_INTERCEPTOR(int, poll, void *fds, long_t nfds, int timeout) {
+  SCOPED_TSAN_INTERCEPTOR(poll, fds, nfds, timeout);
+  int res = BLOCK_REAL(poll)(fds, nfds, timeout);
+  return res;
+}
+
 static void ALWAYS_INLINE rtl_generic_sighandler(bool sigact, int sig,
     my_siginfo_t *info, void *ctx) {
   ThreadState *thr = cur_thread();
@@ -1205,7 +1575,12 @@
   // Don't mess with synchronous signals.
   if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
       sig == SIGABRT || sig == SIGFPE || sig == SIGPIPE ||
-      (sctx && sig == sctx->int_signal_send)) {
+      // If we are sending signal to ourselves, we must process it now.
+      (sctx && sig == sctx->int_signal_send) ||
+      // If we are in blocking function, we can safely process it now
+      // (but check if we are in a recursive interceptor,
+      // i.e. pthread_join()->munmap()).
+      (sctx && sctx->in_blocking_func == 1 && thr->in_rtl == 1)) {
     CHECK(thr->in_rtl == 0 || thr->in_rtl == 1);
     int in_rtl = thr->in_rtl;
     thr->in_rtl = 0;
@@ -1319,7 +1694,44 @@
   return res;
 }
 
-static void process_pending_signals(ThreadState *thr) {
+TSAN_INTERCEPTOR(int, gettimeofday, void *tv, void *tz) {
+  SCOPED_TSAN_INTERCEPTOR(gettimeofday, tv, tz);
+  // It's intercepted merely to process pending signals.
+  return REAL(gettimeofday)(tv, tz);
+}
+
+// Linux kernel has a bug that leads to kernel deadlock if a process
+// maps TBs of memory and then calls mlock().
+static void MlockIsUnsupported() {
+  static atomic_uint8_t printed;
+  if (atomic_exchange(&printed, 1, memory_order_relaxed))
+    return;
+  Printf("INFO: ThreadSanitizer ignores mlock/mlockall/munlock/munlockall\n");
+}
+
+TSAN_INTERCEPTOR(int, mlock, const void *addr, uptr len) {
+  MlockIsUnsupported();
+  return 0;
+}
+
+TSAN_INTERCEPTOR(int, munlock, const void *addr, uptr len) {
+  MlockIsUnsupported();
+  return 0;
+}
+
+TSAN_INTERCEPTOR(int, mlockall, int flags) {
+  MlockIsUnsupported();
+  return 0;
+}
+
+TSAN_INTERCEPTOR(int, munlockall, void) {
+  MlockIsUnsupported();
+  return 0;
+}
+
+namespace __tsan {
+
+void ProcessPendingSignals(ThreadState *thr) {
   CHECK_EQ(thr->in_rtl, 0);
   SignalContext *sctx = SigCtx(thr);
   if (sctx == 0 || sctx->pending_signal_count == 0 || thr->in_signal_handler)
@@ -1344,7 +1756,7 @@
           sigactions[sig].sa_sigaction(sig, &signal->siginfo, &signal->ctx);
         else
           sigactions[sig].sa_handler(sig);
-        if (errno != 0) {
+        if (flags()->report_bugs && errno != 0) {
           ScopedInRtl in_rtl;
           __tsan::StackTrace stack;
           uptr pc = signal->sigaction ?
@@ -1366,7 +1778,10 @@
   thr->in_signal_handler = false;
 }
 
-namespace __tsan {
+static void unreachable() {
+  Printf("FATAL: ThreadSanitizer: unreachable called\n");
+  Die();
+}
 
 void InitializeInterceptors() {
   CHECK_GT(cur_thread()->in_rtl, 0);
@@ -1380,6 +1795,7 @@
   TSAN_INTERCEPT(siglongjmp);
 
   TSAN_INTERCEPT(malloc);
+  TSAN_INTERCEPT(__libc_memalign);
   TSAN_INTERCEPT(calloc);
   TSAN_INTERCEPT(realloc);
   TSAN_INTERCEPT(free);
@@ -1408,9 +1824,6 @@
   TSAN_INTERCEPT(strncpy);
   TSAN_INTERCEPT(strstr);
 
-  TSAN_INTERCEPT(__cxa_guard_acquire);
-  TSAN_INTERCEPT(__cxa_guard_release);
-
   TSAN_INTERCEPT(pthread_create);
   TSAN_INTERCEPT(pthread_join);
   TSAN_INTERCEPT(pthread_detach);
@@ -1459,6 +1872,22 @@
   TSAN_INTERCEPT(sem_post);
   TSAN_INTERCEPT(sem_getvalue);
 
+  TSAN_INTERCEPT(open);
+  TSAN_INTERCEPT(creat);
+  TSAN_INTERCEPT(dup);
+  TSAN_INTERCEPT(dup2);
+  TSAN_INTERCEPT(dup3);
+  TSAN_INTERCEPT(eventfd);
+  TSAN_INTERCEPT(socket);
+  TSAN_INTERCEPT(connect);
+  TSAN_INTERCEPT(accept);
+  TSAN_INTERCEPT(accept4);
+  TSAN_INTERCEPT(epoll_create);
+  TSAN_INTERCEPT(epoll_create1);
+  TSAN_INTERCEPT(close);
+  TSAN_INTERCEPT(pipe);
+  TSAN_INTERCEPT(pipe2);
+
   TSAN_INTERCEPT(read);
   TSAN_INTERCEPT(pread);
   TSAN_INTERCEPT(pread64);
@@ -1484,6 +1913,7 @@
 
   TSAN_INTERCEPT(epoll_ctl);
   TSAN_INTERCEPT(epoll_wait);
+  TSAN_INTERCEPT(poll);
 
   TSAN_INTERCEPT(sigaction);
   TSAN_INTERCEPT(signal);
@@ -1493,19 +1923,30 @@
   TSAN_INTERCEPT(sleep);
   TSAN_INTERCEPT(usleep);
   TSAN_INTERCEPT(nanosleep);
+  TSAN_INTERCEPT(gettimeofday);
 
+  TSAN_INTERCEPT(mlock);
+  TSAN_INTERCEPT(munlock);
+  TSAN_INTERCEPT(mlockall);
+  TSAN_INTERCEPT(munlockall);
+
+  // Need to setup it, because interceptors check that the function is resolved.
+  // But atexit is emitted directly into the module, so can't be resolved.
+  REAL(atexit) = (int(*)(void(*)()))unreachable;
   atexit_ctx = new(internal_alloc(MBlockAtExit, sizeof(AtExitContext)))
       AtExitContext();
 
   if (__cxa_atexit(&finalize, 0, 0)) {
-    TsanPrintf("ThreadSanitizer: failed to setup atexit callback\n");
+    Printf("ThreadSanitizer: failed to setup atexit callback\n");
     Die();
   }
 
   if (pthread_key_create(&g_thread_finalize_key, &thread_finalize)) {
-    TsanPrintf("ThreadSanitizer: failed to create thread key\n");
+    Printf("ThreadSanitizer: failed to create thread key\n");
     Die();
   }
+
+  FdInit();
 }
 
 void internal_start_thread(void(*func)(void *arg), void *arg) {
diff --git a/lib/tsan/rtl/tsan_interceptors.h b/lib/tsan/rtl/tsan_interceptors.h
deleted file mode 100644
index 34b1823..0000000
--- a/lib/tsan/rtl/tsan_interceptors.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- tsan_interceptors.h -------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer (TSan), a race detector.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TSAN_INTERCEPTORS_H
-#define TSAN_INTERCEPTORS_H
-
-#include "interception/interception.h"
-#include "sanitizer_common/sanitizer_stacktrace.h"
-#include "tsan_rtl.h"
-
-namespace __tsan {
-
-class ScopedInterceptor {
- public:
-  ScopedInterceptor(ThreadState *thr, const char *fname, uptr pc);
-  ~ScopedInterceptor();
- private:
-  ThreadState *const thr_;
-  const int in_rtl_;
-};
-
-#define SCOPED_INTERCEPTOR_RAW(func, ...) \
-    ThreadState *thr = cur_thread(); \
-    StatInc(thr, StatInterceptor); \
-    StatInc(thr, StatInt_##func); \
-    const uptr caller_pc = GET_CALLER_PC(); \
-    ScopedInterceptor si(thr, #func, caller_pc); \
-    /* Subtract one from pc as we need current instruction address */ \
-    const uptr pc = __sanitizer::StackTrace::GetCurrentPc() - 1; \
-    (void)pc; \
-/**/
-
-#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
-    SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
-    if (thr->in_rtl > 1) \
-      return REAL(func)(__VA_ARGS__); \
-/**/
-
-#define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
-#define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
-
-}  // namespace __tsan
-
-#endif  // TSAN_INTERCEPTORS_H
diff --git a/lib/tsan/rtl/tsan_interface.h b/lib/tsan/rtl/tsan_interface.h
index ed21ec6..7480fc8 100644
--- a/lib/tsan/rtl/tsan_interface.h
+++ b/lib/tsan/rtl/tsan_interface.h
@@ -16,6 +16,8 @@
 #ifndef TSAN_INTERFACE_H
 #define TSAN_INTERFACE_H
 
+#include <sanitizer/common_interface_defs.h>
+
 // This header should NOT include any other headers.
 // All functions in this header are extern "C" and start with __tsan_.
 
@@ -25,24 +27,30 @@
 
 // This function should be called at the very beginning of the process,
 // before any instrumented code is executed and before any call to malloc.
-void __tsan_init();
+void __tsan_init() SANITIZER_INTERFACE_ATTRIBUTE;
 
-void __tsan_read1(void *addr);
-void __tsan_read2(void *addr);
-void __tsan_read4(void *addr);
-void __tsan_read8(void *addr);
-void __tsan_read16(void *addr);
+void __tsan_read1(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_read2(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_read4(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_read8(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_read16(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
 
-void __tsan_write1(void *addr);
-void __tsan_write2(void *addr);
-void __tsan_write4(void *addr);
-void __tsan_write8(void *addr);
-void __tsan_write16(void *addr);
+void __tsan_write1(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_write2(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_write4(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_write8(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_write16(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
 
-void __tsan_vptr_update(void **vptr_p, void *new_val);
+void __tsan_vptr_update(void **vptr_p, void *new_val)
+    SANITIZER_INTERFACE_ATTRIBUTE;
 
-void __tsan_func_entry(void *call_pc);
-void __tsan_func_exit();
+void __tsan_func_entry(void *call_pc) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_func_exit() SANITIZER_INTERFACE_ATTRIBUTE;
+
+void __tsan_read_range(void *addr, unsigned long size)  // NOLINT
+    SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_write_range(void *addr, unsigned long size)  // NOLINT
+    SANITIZER_INTERFACE_ATTRIBUTE;
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/lib/tsan/rtl/tsan_interface_ann.cc b/lib/tsan/rtl/tsan_interface_ann.cc
index 975cdba..51ebbf2 100644
--- a/lib/tsan/rtl/tsan_interface_ann.cc
+++ b/lib/tsan/rtl/tsan_interface_ann.cc
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "tsan_interface_ann.h"
 #include "tsan_mutex.h"
@@ -159,48 +160,50 @@
 using namespace __tsan;  // NOLINT
 
 extern "C" {
-void AnnotateHappensBefore(char *f, int l, uptr addr) {
+void INTERFACE_ATTRIBUTE AnnotateHappensBefore(char *f, int l, uptr addr) {
   SCOPED_ANNOTATION(AnnotateHappensBefore);
   Release(cur_thread(), CALLERPC, addr);
 }
 
-void AnnotateHappensAfter(char *f, int l, uptr addr) {
+void INTERFACE_ATTRIBUTE AnnotateHappensAfter(char *f, int l, uptr addr) {
   SCOPED_ANNOTATION(AnnotateHappensAfter);
   Acquire(cur_thread(), CALLERPC, addr);
 }
 
-void AnnotateCondVarSignal(char *f, int l, uptr cv) {
+void INTERFACE_ATTRIBUTE AnnotateCondVarSignal(char *f, int l, uptr cv) {
   SCOPED_ANNOTATION(AnnotateCondVarSignal);
 }
 
-void AnnotateCondVarSignalAll(char *f, int l, uptr cv) {
+void INTERFACE_ATTRIBUTE AnnotateCondVarSignalAll(char *f, int l, uptr cv) {
   SCOPED_ANNOTATION(AnnotateCondVarSignalAll);
 }
 
-void AnnotateMutexIsNotPHB(char *f, int l, uptr mu) {
+void INTERFACE_ATTRIBUTE AnnotateMutexIsNotPHB(char *f, int l, uptr mu) {
   SCOPED_ANNOTATION(AnnotateMutexIsNotPHB);
 }
 
-void AnnotateCondVarWait(char *f, int l, uptr cv, uptr lock) {
+void INTERFACE_ATTRIBUTE AnnotateCondVarWait(char *f, int l, uptr cv,
+                                             uptr lock) {
   SCOPED_ANNOTATION(AnnotateCondVarWait);
 }
 
-void AnnotateRWLockCreate(char *f, int l, uptr m) {
+void INTERFACE_ATTRIBUTE AnnotateRWLockCreate(char *f, int l, uptr m) {
   SCOPED_ANNOTATION(AnnotateRWLockCreate);
   MutexCreate(thr, pc, m, true, true, false);
 }
 
-void AnnotateRWLockCreateStatic(char *f, int l, uptr m) {
+void INTERFACE_ATTRIBUTE AnnotateRWLockCreateStatic(char *f, int l, uptr m) {
   SCOPED_ANNOTATION(AnnotateRWLockCreateStatic);
   MutexCreate(thr, pc, m, true, true, true);
 }
 
-void AnnotateRWLockDestroy(char *f, int l, uptr m) {
+void INTERFACE_ATTRIBUTE AnnotateRWLockDestroy(char *f, int l, uptr m) {
   SCOPED_ANNOTATION(AnnotateRWLockDestroy);
   MutexDestroy(thr, pc, m);
 }
 
-void AnnotateRWLockAcquired(char *f, int l, uptr m, uptr is_w) {
+void INTERFACE_ATTRIBUTE AnnotateRWLockAcquired(char *f, int l, uptr m,
+                                                uptr is_w) {
   SCOPED_ANNOTATION(AnnotateRWLockAcquired);
   if (is_w)
     MutexLock(thr, pc, m);
@@ -208,7 +211,8 @@
     MutexReadLock(thr, pc, m);
 }
 
-void AnnotateRWLockReleased(char *f, int l, uptr m, uptr is_w) {
+void INTERFACE_ATTRIBUTE AnnotateRWLockReleased(char *f, int l, uptr m,
+                                                uptr is_w) {
   SCOPED_ANNOTATION(AnnotateRWLockReleased);
   if (is_w)
     MutexUnlock(thr, pc, m);
@@ -216,31 +220,32 @@
     MutexReadUnlock(thr, pc, m);
 }
 
-void AnnotateTraceMemory(char *f, int l, uptr mem) {
+void INTERFACE_ATTRIBUTE AnnotateTraceMemory(char *f, int l, uptr mem) {
   SCOPED_ANNOTATION(AnnotateTraceMemory);
 }
 
-void AnnotateFlushState(char *f, int l) {
+void INTERFACE_ATTRIBUTE AnnotateFlushState(char *f, int l) {
   SCOPED_ANNOTATION(AnnotateFlushState);
 }
 
-void AnnotateNewMemory(char *f, int l, uptr mem, uptr size) {
+void INTERFACE_ATTRIBUTE AnnotateNewMemory(char *f, int l, uptr mem,
+                                           uptr size) {
   SCOPED_ANNOTATION(AnnotateNewMemory);
 }
 
-void AnnotateNoOp(char *f, int l, uptr mem) {
+void INTERFACE_ATTRIBUTE AnnotateNoOp(char *f, int l, uptr mem) {
   SCOPED_ANNOTATION(AnnotateNoOp);
 }
 
 static void ReportMissedExpectedRace(ExpectRace *race) {
-  TsanPrintf("==================\n");
-  TsanPrintf("WARNING: ThreadSanitizer: missed expected data race\n");
-  TsanPrintf("  %s addr=%zx %s:%d\n",
+  Printf("==================\n");
+  Printf("WARNING: ThreadSanitizer: missed expected data race\n");
+  Printf("  %s addr=%zx %s:%d\n",
       race->desc, race->addr, race->file, race->line);
-  TsanPrintf("==================\n");
+  Printf("==================\n");
 }
 
-void AnnotateFlushExpectedRaces(char *f, int l) {
+void INTERFACE_ATTRIBUTE AnnotateFlushExpectedRaces(char *f, int l) {
   SCOPED_ANNOTATION(AnnotateFlushExpectedRaces);
   Lock lock(&dyn_ann_ctx->mtx);
   while (dyn_ann_ctx->expect.next != &dyn_ann_ctx->expect) {
@@ -255,32 +260,39 @@
   }
 }
 
-void AnnotateEnableRaceDetection(char *f, int l, int enable) {
+void INTERFACE_ATTRIBUTE AnnotateEnableRaceDetection(
+    char *f, int l, int enable) {
   SCOPED_ANNOTATION(AnnotateEnableRaceDetection);
   // FIXME: Reconsider this functionality later. It may be irrelevant.
 }
 
-void AnnotateMutexIsUsedAsCondVar(char *f, int l, uptr mu) {
+void INTERFACE_ATTRIBUTE AnnotateMutexIsUsedAsCondVar(
+    char *f, int l, uptr mu) {
   SCOPED_ANNOTATION(AnnotateMutexIsUsedAsCondVar);
 }
 
-void AnnotatePCQGet(char *f, int l, uptr pcq) {
+void INTERFACE_ATTRIBUTE AnnotatePCQGet(
+    char *f, int l, uptr pcq) {
   SCOPED_ANNOTATION(AnnotatePCQGet);
 }
 
-void AnnotatePCQPut(char *f, int l, uptr pcq) {
+void INTERFACE_ATTRIBUTE AnnotatePCQPut(
+    char *f, int l, uptr pcq) {
   SCOPED_ANNOTATION(AnnotatePCQPut);
 }
 
-void AnnotatePCQDestroy(char *f, int l, uptr pcq) {
+void INTERFACE_ATTRIBUTE AnnotatePCQDestroy(
+    char *f, int l, uptr pcq) {
   SCOPED_ANNOTATION(AnnotatePCQDestroy);
 }
 
-void AnnotatePCQCreate(char *f, int l, uptr pcq) {
+void INTERFACE_ATTRIBUTE AnnotatePCQCreate(
+    char *f, int l, uptr pcq) {
   SCOPED_ANNOTATION(AnnotatePCQCreate);
 }
 
-void AnnotateExpectRace(char *f, int l, uptr mem, char *desc) {
+void INTERFACE_ATTRIBUTE AnnotateExpectRace(
+    char *f, int l, uptr mem, char *desc) {
   SCOPED_ANNOTATION(AnnotateExpectRace);
   Lock lock(&dyn_ann_ctx->mtx);
   AddExpectRace(&dyn_ann_ctx->expect,
@@ -288,7 +300,8 @@
   DPrintf("Add expected race: %s addr=%zx %s:%d\n", desc, mem, f, l);
 }
 
-static void BenignRaceImpl(char *f, int l, uptr mem, uptr size, char *desc) {
+static void BenignRaceImpl(
+    char *f, int l, uptr mem, uptr size, char *desc) {
   Lock lock(&dyn_ann_ctx->mtx);
   AddExpectRace(&dyn_ann_ctx->benign,
                 f, l, mem, size, desc);
@@ -296,69 +309,76 @@
 }
 
 // FIXME: Turn it off later. WTF is benign race?1?? Go talk to Hans Boehm.
-void AnnotateBenignRaceSized(char *f, int l, uptr mem, uptr size, char *desc) {
+void INTERFACE_ATTRIBUTE AnnotateBenignRaceSized(
+    char *f, int l, uptr mem, uptr size, char *desc) {
   SCOPED_ANNOTATION(AnnotateBenignRaceSized);
   BenignRaceImpl(f, l, mem, size, desc);
 }
 
-void AnnotateBenignRace(char *f, int l, uptr mem, char *desc) {
+void INTERFACE_ATTRIBUTE AnnotateBenignRace(
+    char *f, int l, uptr mem, char *desc) {
   SCOPED_ANNOTATION(AnnotateBenignRace);
   BenignRaceImpl(f, l, mem, 1, desc);
 }
 
-void AnnotateIgnoreReadsBegin(char *f, int l) {
+void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsBegin(char *f, int l) {
   SCOPED_ANNOTATION(AnnotateIgnoreReadsBegin);
   IgnoreCtl(cur_thread(), false, true);
 }
 
-void AnnotateIgnoreReadsEnd(char *f, int l) {
+void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsEnd(char *f, int l) {
   SCOPED_ANNOTATION(AnnotateIgnoreReadsEnd);
   IgnoreCtl(cur_thread(), false, false);
 }
 
-void AnnotateIgnoreWritesBegin(char *f, int l) {
+void INTERFACE_ATTRIBUTE AnnotateIgnoreWritesBegin(char *f, int l) {
   SCOPED_ANNOTATION(AnnotateIgnoreWritesBegin);
   IgnoreCtl(cur_thread(), true, true);
 }
 
-void AnnotateIgnoreWritesEnd(char *f, int l) {
+void INTERFACE_ATTRIBUTE AnnotateIgnoreWritesEnd(char *f, int l) {
   SCOPED_ANNOTATION(AnnotateIgnoreWritesEnd);
-  IgnoreCtl(cur_thread(), true, false);
+  IgnoreCtl(thr, true, false);
 }
 
-void AnnotatePublishMemoryRange(char *f, int l, uptr addr, uptr size) {
+void INTERFACE_ATTRIBUTE AnnotatePublishMemoryRange(
+    char *f, int l, uptr addr, uptr size) {
   SCOPED_ANNOTATION(AnnotatePublishMemoryRange);
 }
 
-void AnnotateUnpublishMemoryRange(char *f, int l, uptr addr, uptr size) {
+void INTERFACE_ATTRIBUTE AnnotateUnpublishMemoryRange(
+    char *f, int l, uptr addr, uptr size) {
   SCOPED_ANNOTATION(AnnotateUnpublishMemoryRange);
 }
 
-void AnnotateThreadName(char *f, int l, char *name) {
+void INTERFACE_ATTRIBUTE AnnotateThreadName(
+    char *f, int l, char *name) {
   SCOPED_ANNOTATION(AnnotateThreadName);
+  ThreadSetName(thr, name);
 }
 
-void WTFAnnotateHappensBefore(char *f, int l, uptr addr) {
+void INTERFACE_ATTRIBUTE WTFAnnotateHappensBefore(char *f, int l, uptr addr) {
   SCOPED_ANNOTATION(AnnotateHappensBefore);
 }
 
-void WTFAnnotateHappensAfter(char *f, int l, uptr addr) {
+void INTERFACE_ATTRIBUTE WTFAnnotateHappensAfter(char *f, int l, uptr addr) {
   SCOPED_ANNOTATION(AnnotateHappensAfter);
 }
 
-void WTFAnnotateBenignRaceSized(char *f, int l, uptr mem, uptr sz, char *desc) {
+void INTERFACE_ATTRIBUTE WTFAnnotateBenignRaceSized(
+    char *f, int l, uptr mem, uptr sz, char *desc) {
   SCOPED_ANNOTATION(AnnotateBenignRaceSized);
 }
 
-int RunningOnValgrind() {
+int INTERFACE_ATTRIBUTE RunningOnValgrind() {
   return flags()->running_on_valgrind;
 }
 
-double __attribute__((weak)) ValgrindSlowdown(void) {
+double __attribute__((weak)) INTERFACE_ATTRIBUTE ValgrindSlowdown(void) {
   return 10.0;
 }
 
-const char *ThreadSanitizerQuery(const char *query) {
+const char INTERFACE_ATTRIBUTE* ThreadSanitizerQuery(const char *query) {
   if (internal_strcmp(query, "pure_happens_before") == 0)
     return "1";
   else
diff --git a/lib/tsan/rtl/tsan_interface_ann.h b/lib/tsan/rtl/tsan_interface_ann.h
index 09e807a..ed80907 100644
--- a/lib/tsan/rtl/tsan_interface_ann.h
+++ b/lib/tsan/rtl/tsan_interface_ann.h
@@ -14,6 +14,8 @@
 #ifndef TSAN_INTERFACE_ANN_H
 #define TSAN_INTERFACE_ANN_H
 
+#include <sanitizer/common_interface_defs.h>
+
 // This header should NOT include any other headers.
 // All functions in this header are extern "C" and start with __tsan_.
 
@@ -21,8 +23,8 @@
 extern "C" {
 #endif
 
-void __tsan_acquire(void *addr);
-void __tsan_release(void *addr);
+void __tsan_acquire(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
+void __tsan_release(void *addr) SANITIZER_INTERFACE_ATTRIBUTE;
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/lib/tsan/rtl/tsan_interface_atomic.cc b/lib/tsan/rtl/tsan_interface_atomic.cc
index 83b5d25..a9d75e5 100644
--- a/lib/tsan/rtl/tsan_interface_atomic.cc
+++ b/lib/tsan/rtl/tsan_interface_atomic.cc
@@ -11,6 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+// ThreadSanitizer atomic operations are based on C++11/C1x standards.
+// For background see C++11 standard.  A slightly older, publically
+// available draft of the standard (not entirely up-to-date, but close enough
+// for casual browsing) is available here:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
+// The following page contains more background information:
+// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
+
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "tsan_interface_atomic.h"
 #include "tsan_flags.h"
@@ -39,12 +47,13 @@
 typedef __tsan_atomic16 a16;
 typedef __tsan_atomic32 a32;
 typedef __tsan_atomic64 a64;
-const int mo_relaxed = __tsan_memory_order_relaxed;
-const int mo_consume = __tsan_memory_order_consume;
-const int mo_acquire = __tsan_memory_order_acquire;
-const int mo_release = __tsan_memory_order_release;
-const int mo_acq_rel = __tsan_memory_order_acq_rel;
-const int mo_seq_cst = __tsan_memory_order_seq_cst;
+typedef __tsan_atomic128 a128;
+const morder mo_relaxed = __tsan_memory_order_relaxed;
+const morder mo_consume = __tsan_memory_order_consume;
+const morder mo_acquire = __tsan_memory_order_acquire;
+const morder mo_release = __tsan_memory_order_release;
+const morder mo_acq_rel = __tsan_memory_order_acq_rel;
+const morder mo_seq_cst = __tsan_memory_order_seq_cst;
 
 static void AtomicStatInc(ThreadState *thr, uptr size, morder mo, StatType t) {
   StatInc(thr, StatAtomic);
@@ -52,7 +61,8 @@
   StatInc(thr, size == 1 ? StatAtomic1
              : size == 2 ? StatAtomic2
              : size == 4 ? StatAtomic4
-             :             StatAtomic8);
+             : size == 8 ? StatAtomic8
+             :             StatAtomic16);
   StatInc(thr, mo == mo_relaxed ? StatAtomicRelaxed
              : mo == mo_consume ? StatAtomicConsume
              : mo == mo_acquire ? StatAtomicAcquire
@@ -79,10 +89,134 @@
       || mo == mo_acq_rel || mo == mo_seq_cst;
 }
 
+static bool IsAcqRelOrder(morder mo) {
+  return mo == mo_acq_rel || mo == mo_seq_cst;
+}
+
+static morder ConvertOrder(morder mo) {
+  if (mo > (morder)100500) {
+    mo = morder(mo - 100500);
+    if (mo ==  morder(1 << 0))
+      mo = mo_relaxed;
+    else if (mo == morder(1 << 1))
+      mo = mo_consume;
+    else if (mo == morder(1 << 2))
+      mo = mo_acquire;
+    else if (mo == morder(1 << 3))
+      mo = mo_release;
+    else if (mo == morder(1 << 4))
+      mo = mo_acq_rel;
+    else if (mo == morder(1 << 5))
+      mo = mo_seq_cst;
+  }
+  CHECK_GE(mo, mo_relaxed);
+  CHECK_LE(mo, mo_seq_cst);
+  return mo;
+}
+
+template<typename T> T func_xchg(volatile T *v, T op) {
+  T res = __sync_lock_test_and_set(v, op);
+  // __sync_lock_test_and_set does not contain full barrier.
+  __sync_synchronize();
+  return res;
+}
+
+template<typename T> T func_add(volatile T *v, T op) {
+  return __sync_fetch_and_add(v, op);
+}
+
+template<typename T> T func_sub(volatile T *v, T op) {
+  return __sync_fetch_and_sub(v, op);
+}
+
+template<typename T> T func_and(volatile T *v, T op) {
+  return __sync_fetch_and_and(v, op);
+}
+
+template<typename T> T func_or(volatile T *v, T op) {
+  return __sync_fetch_and_or(v, op);
+}
+
+template<typename T> T func_xor(volatile T *v, T op) {
+  return __sync_fetch_and_xor(v, op);
+}
+
+template<typename T> T func_nand(volatile T *v, T op) {
+  // clang does not support __sync_fetch_and_nand.
+  T cmp = *v;
+  for (;;) {
+    T newv = ~(cmp & op);
+    T cur = __sync_val_compare_and_swap(v, cmp, newv);
+    if (cmp == cur)
+      return cmp;
+    cmp = cur;
+  }
+}
+
+template<typename T> T func_cas(volatile T *v, T cmp, T xch) {
+  return __sync_val_compare_and_swap(v, cmp, xch);
+}
+
+// clang does not support 128-bit atomic ops.
+// Atomic ops are executed under tsan internal mutex,
+// here we assume that the atomic variables are not accessed
+// from non-instrumented code.
+#ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
+a128 func_xchg(volatile a128 *v, a128 op) {
+  a128 cmp = *v;
+  *v = op;
+  return cmp;
+}
+
+a128 func_add(volatile a128 *v, a128 op) {
+  a128 cmp = *v;
+  *v = cmp + op;
+  return cmp;
+}
+
+a128 func_sub(volatile a128 *v, a128 op) {
+  a128 cmp = *v;
+  *v = cmp - op;
+  return cmp;
+}
+
+a128 func_and(volatile a128 *v, a128 op) {
+  a128 cmp = *v;
+  *v = cmp & op;
+  return cmp;
+}
+
+a128 func_or(volatile a128 *v, a128 op) {
+  a128 cmp = *v;
+  *v = cmp | op;
+  return cmp;
+}
+
+a128 func_xor(volatile a128 *v, a128 op) {
+  a128 cmp = *v;
+  *v = cmp ^ op;
+  return cmp;
+}
+
+a128 func_nand(volatile a128 *v, a128 op) {
+  a128 cmp = *v;
+  *v = ~(cmp & op);
+  return cmp;
+}
+
+a128 func_cas(volatile a128 *v, a128 cmp, a128 xch) {
+  a128 cur = *v;
+  if (cur == cmp)
+    *v = xch;
+  return cur;
+}
+#endif
+
 #define SCOPED_ATOMIC(func, ...) \
-    if ((u32)mo > 100500) mo = (morder)((u32)mo - 100500); \
+    mo = ConvertOrder(mo); \
     mo = flags()->force_seq_cst_atomics ? (morder)mo_seq_cst : mo; \
     ThreadState *const thr = cur_thread(); \
+    ProcessPendingSignals(thr); \
     const uptr pc = (uptr)__builtin_return_address(0); \
     AtomicStatInc(thr, sizeof(*a), mo, StatAtomic##func); \
     ScopedAtomic sa(thr, pc, __FUNCTION__); \
@@ -93,9 +227,16 @@
 static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a,
     morder mo) {
   CHECK(IsLoadOrder(mo));
+  // This fast-path is critical for performance.
+  // Assume the access is atomic.
+  if (!IsAcquireOrder(mo) && sizeof(T) <= sizeof(a))
+    return *a;
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, false);
+  thr->clock.set(thr->tid, thr->fast_state.epoch());
+  thr->clock.acquire(&s->clock);
   T v = *a;
-  if (IsAcquireOrder(mo))
-    Acquire(thr, pc, (uptr)a);
+  s->mtx.ReadUnlock();
+  __sync_synchronize();
   return v;
 }
 
@@ -103,93 +244,112 @@
 static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
     morder mo) {
   CHECK(IsStoreOrder(mo));
-  if (IsReleaseOrder(mo))
-    ReleaseStore(thr, pc, (uptr)a);
+  // This fast-path is critical for performance.
+  // Assume the access is atomic.
+  // Strictly saying even relaxed store cuts off release sequence,
+  // so must reset the clock.
+  if (!IsReleaseOrder(mo) && sizeof(T) <= sizeof(a)) {
+    *a = v;
+    return;
+  }
+  __sync_synchronize();
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, true);
+  thr->clock.set(thr->tid, thr->fast_state.epoch());
+  thr->clock.ReleaseStore(&s->clock);
   *a = v;
+  s->mtx.Unlock();
+  // Trainling memory barrier to provide sequential consistency
+  // for Dekker-like store-load synchronization.
+  __sync_synchronize();
+}
+
+template<typename T, T (*F)(volatile T *v, T op)>
+static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, true);
+  thr->clock.set(thr->tid, thr->fast_state.epoch());
+  if (IsAcqRelOrder(mo))
+    thr->clock.acq_rel(&s->clock);
+  else if (IsReleaseOrder(mo))
+    thr->clock.release(&s->clock);
+  else if (IsAcquireOrder(mo))
+    thr->clock.acquire(&s->clock);
+  v = F(a, v);
+  s->mtx.Unlock();
+  return v;
 }
 
 template<typename T>
 static T AtomicExchange(ThreadState *thr, uptr pc, volatile T *a, T v,
     morder mo) {
-  if (IsReleaseOrder(mo))
-    Release(thr, pc, (uptr)a);
-  v = __sync_lock_test_and_set(a, v);
-  if (IsAcquireOrder(mo))
-    Acquire(thr, pc, (uptr)a);
-  return v;
+  return AtomicRMW<T, func_xchg>(thr, pc, a, v, mo);
 }
 
 template<typename T>
 static T AtomicFetchAdd(ThreadState *thr, uptr pc, volatile T *a, T v,
     morder mo) {
-  if (IsReleaseOrder(mo))
-    Release(thr, pc, (uptr)a);
-  v = __sync_fetch_and_add(a, v);
-  if (IsAcquireOrder(mo))
-    Acquire(thr, pc, (uptr)a);
-  return v;
+  return AtomicRMW<T, func_add>(thr, pc, a, v, mo);
 }
 
 template<typename T>
 static T AtomicFetchSub(ThreadState *thr, uptr pc, volatile T *a, T v,
     morder mo) {
-  if (IsReleaseOrder(mo))
-    Release(thr, pc, (uptr)a);
-  v = __sync_fetch_and_sub(a, v);
-  if (IsAcquireOrder(mo))
-    Acquire(thr, pc, (uptr)a);
-  return v;
+  return AtomicRMW<T, func_sub>(thr, pc, a, v, mo);
 }
 
 template<typename T>
 static T AtomicFetchAnd(ThreadState *thr, uptr pc, volatile T *a, T v,
     morder mo) {
-  if (IsReleaseOrder(mo))
-    Release(thr, pc, (uptr)a);
-  v = __sync_fetch_and_and(a, v);
-  if (IsAcquireOrder(mo))
-    Acquire(thr, pc, (uptr)a);
-  return v;
+  return AtomicRMW<T, func_and>(thr, pc, a, v, mo);
 }
 
 template<typename T>
 static T AtomicFetchOr(ThreadState *thr, uptr pc, volatile T *a, T v,
     morder mo) {
-  if (IsReleaseOrder(mo))
-    Release(thr, pc, (uptr)a);
-  v = __sync_fetch_and_or(a, v);
-  if (IsAcquireOrder(mo))
-    Acquire(thr, pc, (uptr)a);
-  return v;
+  return AtomicRMW<T, func_or>(thr, pc, a, v, mo);
 }
 
 template<typename T>
 static T AtomicFetchXor(ThreadState *thr, uptr pc, volatile T *a, T v,
     morder mo) {
-  if (IsReleaseOrder(mo))
-    Release(thr, pc, (uptr)a);
-  v = __sync_fetch_and_xor(a, v);
-  if (IsAcquireOrder(mo))
-    Acquire(thr, pc, (uptr)a);
-  return v;
+  return AtomicRMW<T, func_xor>(thr, pc, a, v, mo);
+}
+
+template<typename T>
+static T AtomicFetchNand(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  return AtomicRMW<T, func_nand>(thr, pc, a, v, mo);
 }
 
 template<typename T>
 static bool AtomicCAS(ThreadState *thr, uptr pc,
-    volatile T *a, T *c, T v, morder mo) {
-  if (IsReleaseOrder(mo))
-    Release(thr, pc, (uptr)a);
+    volatile T *a, T *c, T v, morder mo, morder fmo) {
+  (void)fmo;  // Unused because llvm does not pass it yet.
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, true);
+  thr->clock.set(thr->tid, thr->fast_state.epoch());
+  if (IsAcqRelOrder(mo))
+    thr->clock.acq_rel(&s->clock);
+  else if (IsReleaseOrder(mo))
+    thr->clock.release(&s->clock);
+  else if (IsAcquireOrder(mo))
+    thr->clock.acquire(&s->clock);
   T cc = *c;
-  T pr = __sync_val_compare_and_swap(a, cc, v);
-  if (IsAcquireOrder(mo))
-    Acquire(thr, pc, (uptr)a);
+  T pr = func_cas(a, cc, v);
+  s->mtx.Unlock();
   if (pr == cc)
     return true;
   *c = pr;
   return false;
 }
 
+template<typename T>
+static T AtomicCAS(ThreadState *thr, uptr pc,
+    volatile T *a, T c, T v, morder mo, morder fmo) {
+  AtomicCAS(thr, pc, a, &c, v, mo, fmo);
+  return c;
+}
+
 static void AtomicFence(ThreadState *thr, uptr pc, morder mo) {
+  // FIXME(dvyukov): not implemented.
   __sync_synchronize();
 }
 
@@ -209,6 +369,12 @@
   SCOPED_ATOMIC(Load, a, mo);
 }
 
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic128_load(const volatile a128 *a, morder mo) {
+  SCOPED_ATOMIC(Load, a, mo);
+}
+#endif
+
 void __tsan_atomic8_store(volatile a8 *a, a8 v, morder mo) {
   SCOPED_ATOMIC(Store, a, v, mo);
 }
@@ -225,6 +391,12 @@
   SCOPED_ATOMIC(Store, a, v, mo);
 }
 
+#if __TSAN_HAS_INT128
+void __tsan_atomic128_store(volatile a128 *a, a128 v, morder mo) {
+  SCOPED_ATOMIC(Store, a, v, mo);
+}
+#endif
+
 a8 __tsan_atomic8_exchange(volatile a8 *a, a8 v, morder mo) {
   SCOPED_ATOMIC(Exchange, a, v, mo);
 }
@@ -241,6 +413,12 @@
   SCOPED_ATOMIC(Exchange, a, v, mo);
 }
 
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic128_exchange(volatile a128 *a, a128 v, morder mo) {
+  SCOPED_ATOMIC(Exchange, a, v, mo);
+}
+#endif
+
 a8 __tsan_atomic8_fetch_add(volatile a8 *a, a8 v, morder mo) {
   SCOPED_ATOMIC(FetchAdd, a, v, mo);
 }
@@ -257,6 +435,12 @@
   SCOPED_ATOMIC(FetchAdd, a, v, mo);
 }
 
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic128_fetch_add(volatile a128 *a, a128 v, morder mo) {
+  SCOPED_ATOMIC(FetchAdd, a, v, mo);
+}
+#endif
+
 a8 __tsan_atomic8_fetch_sub(volatile a8 *a, a8 v, morder mo) {
   SCOPED_ATOMIC(FetchSub, a, v, mo);
 }
@@ -273,6 +457,12 @@
   SCOPED_ATOMIC(FetchSub, a, v, mo);
 }
 
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic128_fetch_sub(volatile a128 *a, a128 v, morder mo) {
+  SCOPED_ATOMIC(FetchSub, a, v, mo);
+}
+#endif
+
 a8 __tsan_atomic8_fetch_and(volatile a8 *a, a8 v, morder mo) {
   SCOPED_ATOMIC(FetchAnd, a, v, mo);
 }
@@ -289,6 +479,12 @@
   SCOPED_ATOMIC(FetchAnd, a, v, mo);
 }
 
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic128_fetch_and(volatile a128 *a, a128 v, morder mo) {
+  SCOPED_ATOMIC(FetchAnd, a, v, mo);
+}
+#endif
+
 a8 __tsan_atomic8_fetch_or(volatile a8 *a, a8 v, morder mo) {
   SCOPED_ATOMIC(FetchOr, a, v, mo);
 }
@@ -305,6 +501,12 @@
   SCOPED_ATOMIC(FetchOr, a, v, mo);
 }
 
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic128_fetch_or(volatile a128 *a, a128 v, morder mo) {
+  SCOPED_ATOMIC(FetchOr, a, v, mo);
+}
+#endif
+
 a8 __tsan_atomic8_fetch_xor(volatile a8 *a, a8 v, morder mo) {
   SCOPED_ATOMIC(FetchXor, a, v, mo);
 }
@@ -321,46 +523,114 @@
   SCOPED_ATOMIC(FetchXor, a, v, mo);
 }
 
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic128_fetch_xor(volatile a128 *a, a128 v, morder mo) {
+  SCOPED_ATOMIC(FetchXor, a, v, mo);
+}
+#endif
+
+a8 __tsan_atomic8_fetch_nand(volatile a8 *a, a8 v, morder mo) {
+  SCOPED_ATOMIC(FetchNand, a, v, mo);
+}
+
+a16 __tsan_atomic16_fetch_nand(volatile a16 *a, a16 v, morder mo) {
+  SCOPED_ATOMIC(FetchNand, a, v, mo);
+}
+
+a32 __tsan_atomic32_fetch_nand(volatile a32 *a, a32 v, morder mo) {
+  SCOPED_ATOMIC(FetchNand, a, v, mo);
+}
+
+a64 __tsan_atomic64_fetch_nand(volatile a64 *a, a64 v, morder mo) {
+  SCOPED_ATOMIC(FetchNand, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic128_fetch_nand(volatile a128 *a, a128 v, morder mo) {
+  SCOPED_ATOMIC(FetchNand, a, v, mo);
+}
+#endif
+
 int __tsan_atomic8_compare_exchange_strong(volatile a8 *a, a8 *c, a8 v,
-    morder mo) {
-  SCOPED_ATOMIC(CAS, a, c, v, mo);
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
 }
 
 int __tsan_atomic16_compare_exchange_strong(volatile a16 *a, a16 *c, a16 v,
-    morder mo) {
-  SCOPED_ATOMIC(CAS, a, c, v, mo);
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
 }
 
 int __tsan_atomic32_compare_exchange_strong(volatile a32 *a, a32 *c, a32 v,
-    morder mo) {
-  SCOPED_ATOMIC(CAS, a, c, v, mo);
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
 }
 
 int __tsan_atomic64_compare_exchange_strong(volatile a64 *a, a64 *c, a64 v,
-    morder mo) {
-  SCOPED_ATOMIC(CAS, a, c, v, mo);
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
 }
 
+#if __TSAN_HAS_INT128
+int __tsan_atomic128_compare_exchange_strong(volatile a128 *a, a128 *c, a128 v,
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+}
+#endif
+
 int __tsan_atomic8_compare_exchange_weak(volatile a8 *a, a8 *c, a8 v,
-    morder mo) {
-  SCOPED_ATOMIC(CAS, a, c, v, mo);
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
 }
 
 int __tsan_atomic16_compare_exchange_weak(volatile a16 *a, a16 *c, a16 v,
-    morder mo) {
-  SCOPED_ATOMIC(CAS, a, c, v, mo);
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
 }
 
 int __tsan_atomic32_compare_exchange_weak(volatile a32 *a, a32 *c, a32 v,
-    morder mo) {
-  SCOPED_ATOMIC(CAS, a, c, v, mo);
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
 }
 
 int __tsan_atomic64_compare_exchange_weak(volatile a64 *a, a64 *c, a64 v,
-    morder mo) {
-  SCOPED_ATOMIC(CAS, a, c, v, mo);
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
 }
 
+#if __TSAN_HAS_INT128
+int __tsan_atomic128_compare_exchange_weak(volatile a128 *a, a128 *c, a128 v,
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+}
+#endif
+
+a8 __tsan_atomic8_compare_exchange_val(volatile a8 *a, a8 c, a8 v,
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+}
+a16 __tsan_atomic16_compare_exchange_val(volatile a16 *a, a16 c, a16 v,
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+}
+
+a32 __tsan_atomic32_compare_exchange_val(volatile a32 *a, a32 c, a32 v,
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+}
+
+a64 __tsan_atomic64_compare_exchange_val(volatile a64 *a, a64 c, a64 v,
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+}
+
+#if __TSAN_HAS_INT128
+a128 __tsan_atomic64_compare_exchange_val(volatile a128 *a, a128 c, a128 v,
+    morder mo, morder fmo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+}
+#endif
+
 void __tsan_atomic_thread_fence(morder mo) {
   char* a;
   SCOPED_ATOMIC(Fence, mo);
diff --git a/lib/tsan/rtl/tsan_interface_atomic.h b/lib/tsan/rtl/tsan_interface_atomic.h
index 2fa0021..c304fcc 100644
--- a/lib/tsan/rtl/tsan_interface_atomic.h
+++ b/lib/tsan/rtl/tsan_interface_atomic.h
@@ -13,121 +13,193 @@
 #ifndef TSAN_INTERFACE_ATOMIC_H
 #define TSAN_INTERFACE_ATOMIC_H
 
+#ifndef INTERFACE_ATTRIBUTE
+# define INTERFACE_ATTRIBUTE __attribute__((visibility("default")))
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-typedef char  __tsan_atomic8;
-typedef short __tsan_atomic16;  // NOLINT
-typedef int   __tsan_atomic32;
-typedef long  __tsan_atomic64;  // NOLINT
+typedef char     __tsan_atomic8;
+typedef short    __tsan_atomic16;  // NOLINT
+typedef int      __tsan_atomic32;
+typedef long     __tsan_atomic64;  // NOLINT
+
+#if defined(__SIZEOF_INT128__) \
+    || (__clang_major__ * 100 + __clang_minor__ >= 302)
+typedef __int128 __tsan_atomic128;
+#define __TSAN_HAS_INT128 1
+#else
+typedef char     __tsan_atomic128;
+#define __TSAN_HAS_INT128 0
+#endif
 
 // Part of ABI, do not change.
 // http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/atomic?view=markup
 typedef enum {
-  __tsan_memory_order_relaxed = 1 << 0,
-  __tsan_memory_order_consume = 1 << 1,
-  __tsan_memory_order_acquire = 1 << 2,
-  __tsan_memory_order_release = 1 << 3,
-  __tsan_memory_order_acq_rel = 1 << 4,
-  __tsan_memory_order_seq_cst = 1 << 5
+  __tsan_memory_order_relaxed,
+  __tsan_memory_order_consume,
+  __tsan_memory_order_acquire,
+  __tsan_memory_order_release,
+  __tsan_memory_order_acq_rel,
+  __tsan_memory_order_seq_cst
 } __tsan_memory_order;
 
 __tsan_atomic8 __tsan_atomic8_load(const volatile __tsan_atomic8 *a,
-    __tsan_memory_order mo);
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic16 __tsan_atomic16_load(const volatile __tsan_atomic16 *a,
-    __tsan_memory_order mo);
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic32 __tsan_atomic32_load(const volatile __tsan_atomic32 *a,
-    __tsan_memory_order mo);
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic64 __tsan_atomic64_load(const volatile __tsan_atomic64 *a,
-    __tsan_memory_order mo);
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_load(const volatile __tsan_atomic128 *a,
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 void __tsan_atomic8_store(volatile __tsan_atomic8 *a, __tsan_atomic8 v,
-    __tsan_memory_order mo);
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 void __tsan_atomic16_store(volatile __tsan_atomic16 *a, __tsan_atomic16 v,
-    __tsan_memory_order mo);
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 void __tsan_atomic32_store(volatile __tsan_atomic32 *a, __tsan_atomic32 v,
-    __tsan_memory_order mo);
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 void __tsan_atomic64_store(volatile __tsan_atomic64 *a, __tsan_atomic64 v,
-    __tsan_memory_order mo);
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+void __tsan_atomic128_store(volatile __tsan_atomic128 *a, __tsan_atomic128 v,
+    __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 __tsan_atomic8 __tsan_atomic8_exchange(volatile __tsan_atomic8 *a,
-    __tsan_atomic8 v, __tsan_memory_order mo);
+    __tsan_atomic8 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic16 __tsan_atomic16_exchange(volatile __tsan_atomic16 *a,
-    __tsan_atomic16 v, __tsan_memory_order mo);
+    __tsan_atomic16 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic32 __tsan_atomic32_exchange(volatile __tsan_atomic32 *a,
-    __tsan_atomic32 v, __tsan_memory_order mo);
+    __tsan_atomic32 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic64 __tsan_atomic64_exchange(volatile __tsan_atomic64 *a,
-    __tsan_atomic64 v, __tsan_memory_order mo);
+    __tsan_atomic64 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_exchange(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 __tsan_atomic8 __tsan_atomic8_fetch_add(volatile __tsan_atomic8 *a,
-    __tsan_atomic8 v, __tsan_memory_order mo);
+    __tsan_atomic8 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic16 __tsan_atomic16_fetch_add(volatile __tsan_atomic16 *a,
-    __tsan_atomic16 v, __tsan_memory_order mo);
+    __tsan_atomic16 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic32 __tsan_atomic32_fetch_add(volatile __tsan_atomic32 *a,
-    __tsan_atomic32 v, __tsan_memory_order mo);
+    __tsan_atomic32 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic64 __tsan_atomic64_fetch_add(volatile __tsan_atomic64 *a,
-    __tsan_atomic64 v, __tsan_memory_order mo);
+    __tsan_atomic64 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_fetch_add(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 __tsan_atomic8 __tsan_atomic8_fetch_sub(volatile __tsan_atomic8 *a,
-    __tsan_atomic8 v, __tsan_memory_order mo);
+    __tsan_atomic8 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic16 __tsan_atomic16_fetch_sub(volatile __tsan_atomic16 *a,
-    __tsan_atomic16 v, __tsan_memory_order mo);
+    __tsan_atomic16 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic32 __tsan_atomic32_fetch_sub(volatile __tsan_atomic32 *a,
-    __tsan_atomic32 v, __tsan_memory_order mo);
+    __tsan_atomic32 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic64 __tsan_atomic64_fetch_sub(volatile __tsan_atomic64 *a,
-    __tsan_atomic64 v, __tsan_memory_order mo);
+    __tsan_atomic64 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_fetch_sub(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 __tsan_atomic8 __tsan_atomic8_fetch_and(volatile __tsan_atomic8 *a,
-    __tsan_atomic8 v, __tsan_memory_order mo);
+    __tsan_atomic8 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic16 __tsan_atomic16_fetch_and(volatile __tsan_atomic16 *a,
-    __tsan_atomic16 v, __tsan_memory_order mo);
+    __tsan_atomic16 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic32 __tsan_atomic32_fetch_and(volatile __tsan_atomic32 *a,
-    __tsan_atomic32 v, __tsan_memory_order mo);
+    __tsan_atomic32 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic64 __tsan_atomic64_fetch_and(volatile __tsan_atomic64 *a,
-    __tsan_atomic64 v, __tsan_memory_order mo);
+    __tsan_atomic64 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_fetch_and(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 __tsan_atomic8 __tsan_atomic8_fetch_or(volatile __tsan_atomic8 *a,
-    __tsan_atomic8 v, __tsan_memory_order mo);
+    __tsan_atomic8 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic16 __tsan_atomic16_fetch_or(volatile __tsan_atomic16 *a,
-    __tsan_atomic16 v, __tsan_memory_order mo);
+    __tsan_atomic16 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic32 __tsan_atomic32_fetch_or(volatile __tsan_atomic32 *a,
-    __tsan_atomic32 v, __tsan_memory_order mo);
+    __tsan_atomic32 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic64 __tsan_atomic64_fetch_or(volatile __tsan_atomic64 *a,
-    __tsan_atomic64 v, __tsan_memory_order mo);
+    __tsan_atomic64 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_fetch_or(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 __tsan_atomic8 __tsan_atomic8_fetch_xor(volatile __tsan_atomic8 *a,
-    __tsan_atomic8 v, __tsan_memory_order mo);
+    __tsan_atomic8 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic16 __tsan_atomic16_fetch_xor(volatile __tsan_atomic16 *a,
-    __tsan_atomic16 v, __tsan_memory_order mo);
+    __tsan_atomic16 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic32 __tsan_atomic32_fetch_xor(volatile __tsan_atomic32 *a,
-    __tsan_atomic32 v, __tsan_memory_order mo);
+    __tsan_atomic32 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 __tsan_atomic64 __tsan_atomic64_fetch_xor(volatile __tsan_atomic64 *a,
-    __tsan_atomic64 v, __tsan_memory_order mo);
+    __tsan_atomic64 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_fetch_xor(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+
+__tsan_atomic8 __tsan_atomic8_fetch_nand(volatile __tsan_atomic8 *a,
+    __tsan_atomic8 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic16 __tsan_atomic16_fetch_nand(volatile __tsan_atomic16 *a,
+    __tsan_atomic16 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic32 __tsan_atomic32_fetch_nand(volatile __tsan_atomic32 *a,
+    __tsan_atomic32 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic64 __tsan_atomic64_fetch_nand(volatile __tsan_atomic64 *a,
+    __tsan_atomic64 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_fetch_nand(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 v, __tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 int __tsan_atomic8_compare_exchange_weak(volatile __tsan_atomic8 *a,
-    __tsan_atomic8 *c, __tsan_atomic8 v, __tsan_memory_order mo);
+    __tsan_atomic8 *c, __tsan_atomic8 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
 int __tsan_atomic16_compare_exchange_weak(volatile __tsan_atomic16 *a,
-    __tsan_atomic16 *c, __tsan_atomic16 v, __tsan_memory_order mo);
+    __tsan_atomic16 *c, __tsan_atomic16 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
 int __tsan_atomic32_compare_exchange_weak(volatile __tsan_atomic32 *a,
-    __tsan_atomic32 *c, __tsan_atomic32 v, __tsan_memory_order mo);
+    __tsan_atomic32 *c, __tsan_atomic32 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
 int __tsan_atomic64_compare_exchange_weak(volatile __tsan_atomic64 *a,
-    __tsan_atomic64 *c, __tsan_atomic64 v, __tsan_memory_order mo);
+    __tsan_atomic64 *c, __tsan_atomic64 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
+int __tsan_atomic128_compare_exchange_weak(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 *c, __tsan_atomic128 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
 
 int __tsan_atomic8_compare_exchange_strong(volatile __tsan_atomic8 *a,
-    __tsan_atomic8 *c, __tsan_atomic8 v, __tsan_memory_order mo);
+    __tsan_atomic8 *c, __tsan_atomic8 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
 int __tsan_atomic16_compare_exchange_strong(volatile __tsan_atomic16 *a,
-    __tsan_atomic16 *c, __tsan_atomic16 v, __tsan_memory_order mo);
+    __tsan_atomic16 *c, __tsan_atomic16 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
 int __tsan_atomic32_compare_exchange_strong(volatile __tsan_atomic32 *a,
-    __tsan_atomic32 *c, __tsan_atomic32 v, __tsan_memory_order mo);
+    __tsan_atomic32 *c, __tsan_atomic32 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
 int __tsan_atomic64_compare_exchange_strong(volatile __tsan_atomic64 *a,
-    __tsan_atomic64 *c, __tsan_atomic64 v, __tsan_memory_order mo);
+    __tsan_atomic64 *c, __tsan_atomic64 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
+int __tsan_atomic128_compare_exchange_strong(volatile __tsan_atomic128 *a,
+    __tsan_atomic128 *c, __tsan_atomic128 v, __tsan_memory_order mo,
+    __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
 
-void __tsan_atomic_thread_fence(__tsan_memory_order mo);
-void __tsan_atomic_signal_fence(__tsan_memory_order mo);
+__tsan_atomic8 __tsan_atomic8_compare_exchange_val(
+    volatile __tsan_atomic8 *a, __tsan_atomic8 c, __tsan_atomic8 v,
+    __tsan_memory_order mo, __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic16 __tsan_atomic16_compare_exchange_val(
+    volatile __tsan_atomic16 *a, __tsan_atomic16 c, __tsan_atomic16 v,
+    __tsan_memory_order mo, __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic32 __tsan_atomic32_compare_exchange_val(
+    volatile __tsan_atomic32 *a, __tsan_atomic32 c, __tsan_atomic32 v,
+    __tsan_memory_order mo, __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic64 __tsan_atomic64_compare_exchange_val(
+    volatile __tsan_atomic64 *a, __tsan_atomic64 c, __tsan_atomic64 v,
+    __tsan_memory_order mo, __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
+__tsan_atomic128 __tsan_atomic128_compare_exchange_val(
+    volatile __tsan_atomic128 *a, __tsan_atomic128 c, __tsan_atomic128 v,
+    __tsan_memory_order mo, __tsan_memory_order fail_mo) INTERFACE_ATTRIBUTE;
+
+void __tsan_atomic_thread_fence(__tsan_memory_order mo) INTERFACE_ATTRIBUTE;
+void __tsan_atomic_signal_fence(__tsan_memory_order mo) INTERFACE_ATTRIBUTE;
 
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 
+#undef INTERFACE_ATTRIBUTE
+
 #endif  // #ifndef TSAN_INTERFACE_ATOMIC_H
diff --git a/lib/tsan/rtl/tsan_interface_inl.h b/lib/tsan/rtl/tsan_interface_inl.h
index 233f902..8a92155 100644
--- a/lib/tsan/rtl/tsan_interface_inl.h
+++ b/lib/tsan/rtl/tsan_interface_inl.h
@@ -63,3 +63,11 @@
 void __tsan_func_exit() {
   FuncExit(cur_thread());
 }
+
+void __tsan_read_range(void *addr, uptr size) {
+  MemoryAccessRange(cur_thread(), CALLERPC, (uptr)addr, size, false);
+}
+
+void __tsan_write_range(void *addr, uptr size) {
+  MemoryAccessRange(cur_thread(), CALLERPC, (uptr)addr, size, true);
+}
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index fcc3000..b7e3c76 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -60,8 +60,9 @@
   void *p = allocator()->Allocate(&thr->alloc_cache, sz, align);
   if (p == 0)
     return 0;
-  MBlock *b = (MBlock*)allocator()->GetMetaData(p);
+  MBlock *b = new(allocator()->GetMetaData(p)) MBlock;
   b->size = sz;
+  b->head = 0;
   b->alloc_tid = thr->unique_id;
   b->alloc_stack_id = CurrentStackId(thr, pc);
   if (CTX() && CTX()->initialized) {
@@ -92,6 +93,7 @@
   if (CTX() && CTX()->initialized && thr->in_rtl == 1) {
     MemoryRangeFreed(thr, pc, (uptr)p, b->size);
   }
+  b->~MBlock();
   allocator()->Deallocate(&thr->alloc_cache, p);
   SignalUnsafeCall(thr, pc);
 }
diff --git a/lib/tsan/rtl/tsan_mutex.cc b/lib/tsan/rtl/tsan_mutex.cc
index 1559ea6..ca9b108 100644
--- a/lib/tsan/rtl/tsan_mutex.cc
+++ b/lib/tsan/rtl/tsan_mutex.cc
@@ -30,12 +30,13 @@
   /*0 MutexTypeInvalid*/     {},
   /*1 MutexTypeTrace*/       {MutexTypeLeaf},
   /*2 MutexTypeThreads*/     {MutexTypeReport},
-  /*3 MutexTypeReport*/      {},
+  /*3 MutexTypeReport*/      {MutexTypeSyncTab, MutexTypeMBlock},
   /*4 MutexTypeSyncVar*/     {},
   /*5 MutexTypeSyncTab*/     {MutexTypeSyncVar},
   /*6 MutexTypeSlab*/        {MutexTypeLeaf},
   /*7 MutexTypeAnnotations*/ {},
   /*8 MutexTypeAtExit*/      {MutexTypeSyncTab},
+  /*9 MutexTypeMBlock*/      {MutexTypeSyncVar},
 };
 
 static bool CanLockAdj[MutexTypeCount][MutexTypeCount];
@@ -92,25 +93,25 @@
     }
   }
 #if 0
-  TsanPrintf("Can lock graph:\n");
+  Printf("Can lock graph:\n");
   for (int i = 0; i < N; i++) {
     for (int j = 0; j < N; j++) {
-      TsanPrintf("%d ", CanLockAdj[i][j]);
+      Printf("%d ", CanLockAdj[i][j]);
     }
-    TsanPrintf("\n");
+    Printf("\n");
   }
-  TsanPrintf("Can lock graph closure:\n");
+  Printf("Can lock graph closure:\n");
   for (int i = 0; i < N; i++) {
     for (int j = 0; j < N; j++) {
-      TsanPrintf("%d ", CanLockAdj2[i][j]);
+      Printf("%d ", CanLockAdj2[i][j]);
     }
-    TsanPrintf("\n");
+    Printf("\n");
   }
 #endif
   // Verify that the graph is acyclic.
   for (int i = 0; i < N; i++) {
     if (CanLockAdj2[i][i]) {
-      TsanPrintf("Mutex %d participates in a cycle\n", i);
+      Printf("Mutex %d participates in a cycle\n", i);
       Die();
     }
   }
@@ -121,7 +122,9 @@
 }
 
 void DeadlockDetector::Lock(MutexType t) {
-  // TsanPrintf("LOCK %d @%zu\n", t, seq_ + 1);
+  // Printf("LOCK %d @%zu\n", t, seq_ + 1);
+  CHECK_GT(t, MutexTypeInvalid);
+  CHECK_LT(t, MutexTypeCount);
   u64 max_seq = 0;
   u64 max_idx = MutexTypeInvalid;
   for (int i = 0; i != MutexTypeCount; i++) {
@@ -136,17 +139,17 @@
   locked_[t] = ++seq_;
   if (max_idx == MutexTypeInvalid)
     return;
-  // TsanPrintf("  last %d @%zu\n", max_idx, max_seq);
+  // Printf("  last %d @%zu\n", max_idx, max_seq);
   if (!CanLockAdj[max_idx][t]) {
-    TsanPrintf("ThreadSanitizer: internal deadlock detected\n");
-    TsanPrintf("ThreadSanitizer: can't lock %d while under %zu\n",
+    Printf("ThreadSanitizer: internal deadlock detected\n");
+    Printf("ThreadSanitizer: can't lock %d while under %zu\n",
                t, (uptr)max_idx);
     CHECK(0);
   }
 }
 
 void DeadlockDetector::Unlock(MutexType t) {
-  // TsanPrintf("UNLO %d @%zu #%zu\n", t, seq_, locked_[t]);
+  // Printf("UNLO %d @%zu #%zu\n", t, seq_, locked_[t]);
   CHECK(locked_[t]);
   locked_[t] = 0;
 }
diff --git a/lib/tsan/rtl/tsan_mutex.h b/lib/tsan/rtl/tsan_mutex.h
index d74bfd8..68b33a7 100644
--- a/lib/tsan/rtl/tsan_mutex.h
+++ b/lib/tsan/rtl/tsan_mutex.h
@@ -29,6 +29,7 @@
   MutexTypeSlab,
   MutexTypeAnnotations,
   MutexTypeAtExit,
+  MutexTypeMBlock,
 
   // This must be the last.
   MutexTypeCount
diff --git a/lib/tsan/rtl/tsan_mutexset.cc b/lib/tsan/rtl/tsan_mutexset.cc
new file mode 100644
index 0000000..2158777
--- /dev/null
+++ b/lib/tsan/rtl/tsan_mutexset.cc
@@ -0,0 +1,89 @@
+//===-- tsan_mutexset.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_mutexset.h"
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+const uptr MutexSet::kMaxSize;
+
+MutexSet::MutexSet() {
+  size_ = 0;
+  internal_memset(&descs_, 0, sizeof(descs_));
+}
+
+void MutexSet::Add(u64 id, bool write, u64 epoch) {
+  // Look up existing mutex with the same id.
+  for (uptr i = 0; i < size_; i++) {
+    if (descs_[i].id == id) {
+      descs_[i].count++;
+      descs_[i].epoch = epoch;
+      return;
+    }
+  }
+  // On overflow, find the oldest mutex and drop it.
+  if (size_ == kMaxSize) {
+    u64 minepoch = (u64)-1;
+    u64 mini = (u64)-1;
+    for (uptr i = 0; i < size_; i++) {
+      if (descs_[i].epoch < minepoch) {
+        minepoch = descs_[i].epoch;
+        mini = i;
+      }
+    }
+    RemovePos(mini);
+    CHECK_EQ(size_, kMaxSize - 1);
+  }
+  // Add new mutex descriptor.
+  descs_[size_].id = id;
+  descs_[size_].write = write;
+  descs_[size_].epoch = epoch;
+  descs_[size_].count = 1;
+  size_++;
+}
+
+void MutexSet::Del(u64 id, bool write) {
+  for (uptr i = 0; i < size_; i++) {
+    if (descs_[i].id == id) {
+      if (--descs_[i].count == 0)
+        RemovePos(i);
+      return;
+    }
+  }
+}
+
+void MutexSet::Remove(u64 id) {
+  for (uptr i = 0; i < size_; i++) {
+    if (descs_[i].id == id) {
+      RemovePos(i);
+      return;
+    }
+  }
+}
+
+void MutexSet::RemovePos(uptr i) {
+  CHECK_LT(i, size_);
+  descs_[i] = descs_[size_ - 1];
+  size_--;
+}
+
+uptr MutexSet::Size() const {
+  return size_;
+}
+
+MutexSet::Desc MutexSet::Get(uptr i) const {
+  CHECK_LT(i, size_);
+  return descs_[i];
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_mutexset.h b/lib/tsan/rtl/tsan_mutexset.h
new file mode 100644
index 0000000..09223ff
--- /dev/null
+++ b/lib/tsan/rtl/tsan_mutexset.h
@@ -0,0 +1,65 @@
+//===-- tsan_mutexset.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// MutexSet holds the set of mutexes currently held by a thread.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_MUTEXSET_H
+#define TSAN_MUTEXSET_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+class MutexSet {
+ public:
+  // Holds limited number of mutexes.
+  // The oldest mutexes are discarded on overflow.
+  static const uptr kMaxSize = 64;
+  struct Desc {
+    u64 id;
+    u64 epoch;
+    int count;
+    bool write;
+  };
+
+  MutexSet();
+  // The 'id' is obtained from SyncVar::GetId().
+  void Add(u64 id, bool write, u64 epoch);
+  void Del(u64 id, bool write);
+  void Remove(u64 id);  // Removes the mutex completely (if it's destroyed).
+  uptr Size() const;
+  Desc Get(uptr i) const;
+
+ private:
+#ifndef TSAN_GO
+  uptr size_;
+  Desc descs_[kMaxSize];
+#endif
+
+  void RemovePos(uptr i);
+};
+
+// Go does not have mutexes, so do not spend memory and time.
+// (Go sync.Mutex is actually a semaphore -- can be unlocked
+// in different goroutine).
+#ifdef TSAN_GO
+MutexSet::MutexSet() {}
+void MutexSet::Add(u64 id, bool write, u64 epoch) {}
+void MutexSet::Del(u64 id, bool write) {}
+void MutexSet::Remove(u64 id) {}
+void MutexSet::RemovePos(uptr i) {}
+uptr MutexSet::Size() const { return 0; }
+MutexSet::Desc MutexSet::Get(uptr i) const { return Desc(); }
+#endif
+
+}  // namespace __tsan
+
+#endif  // TSAN_REPORT_H
diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h
index b80b268..4b7abb5 100644
--- a/lib/tsan/rtl/tsan_platform.h
+++ b/lib/tsan/rtl/tsan_platform.h
@@ -12,18 +12,65 @@
 // Platform-specific code.
 //===----------------------------------------------------------------------===//
 
+/*
+C++ linux memory layout:
+0000 0000 0000 - 03c0 0000 0000: protected
+03c0 0000 0000 - 1000 0000 0000: shadow
+1000 0000 0000 - 6000 0000 0000: protected
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 7d00 0000 0000: -
+7d00 0000 0000 - 7e00 0000 0000: heap
+7e00 0000 0000 - 7fff ffff ffff: modules and main thread stack
+
+C++ COMPAT linux memory layout:
+0000 0000 0000 - 0400 0000 0000: protected
+0400 0000 0000 - 1000 0000 0000: shadow
+1000 0000 0000 - 2900 0000 0000: protected
+2900 0000 0000 - 2c00 0000 0000: modules
+2c00 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 7d00 0000 0000: -
+7d00 0000 0000 - 7e00 0000 0000: heap
+7e00 0000 0000 - 7f00 0000 0000: -
+7f00 0000 0000 - 7fff ffff ffff: main thread stack
+
+Go linux and darwin memory layout:
+0000 0000 0000 - 0000 1000 0000: executable
+0000 1000 0000 - 00f8 0000 0000: -
+00f8 0000 0000 - 0118 0000 0000: heap
+0118 0000 0000 - 1000 0000 0000: -
+1000 0000 0000 - 1460 0000 0000: shadow
+1460 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 7fff ffff ffff: -
+
+Go windows memory layout:
+0000 0000 0000 - 0000 1000 0000: executable
+0000 1000 0000 - 00f8 0000 0000: -
+00f8 0000 0000 - 0118 0000 0000: heap
+0118 0000 0000 - 0100 0000 0000: -
+0100 0000 0000 - 0560 0000 0000: shadow
+0560 0000 0000 - 0760 0000 0000: traces
+0760 0000 0000 - 07ff ffff ffff: -
+*/
+
 #ifndef TSAN_PLATFORM_H
 #define TSAN_PLATFORM_H
 
-#include "tsan_rtl.h"
+#include "tsan_defs.h"
+#include "tsan_trace.h"
 
-#if __LP64__
+#if defined(__LP64__) || defined(_WIN64)
 namespace __tsan {
 
 #if defined(TSAN_GO)
 static const uptr kLinuxAppMemBeg = 0x000000000000ULL;
 static const uptr kLinuxAppMemEnd = 0x00fcffffffffULL;
+# if defined(_WIN32)
+static const uptr kLinuxShadowMsk = 0x010000000000ULL;
+# else
 static const uptr kLinuxShadowMsk = 0x100000000000ULL;
+# endif
 // TSAN_COMPAT_SHADOW is intended for COMPAT virtual memory layout,
 // when memory addresses are of the 0x2axxxxxxxxxx form.
 // The option is enabled with 'setarch x86_64 -L'.
@@ -37,6 +84,13 @@
 
 static const uptr kLinuxAppMemMsk = 0x7c0000000000ULL;
 
+#if defined(_WIN32)
+const uptr kTraceMemBegin = 0x056000000000ULL;
+#else
+const uptr kTraceMemBegin = 0x600000000000ULL;
+#endif
+const uptr kTraceMemSize = 0x020000000000ULL;
+
 // This has to be a macro to allow constant initialization of constants below.
 #ifndef TSAN_GO
 #define MemToShadow(addr) \
@@ -48,7 +102,7 @@
 
 static const uptr kLinuxShadowBeg = MemToShadow(kLinuxAppMemBeg);
 static const uptr kLinuxShadowEnd =
-  MemToShadow(kLinuxAppMemEnd) | (kPageSize - 1);
+    MemToShadow(kLinuxAppMemEnd) | 0xff;
 
 static inline bool IsAppMem(uptr mem) {
   return mem >= kLinuxAppMemBeg && mem <= kLinuxAppMemEnd;
@@ -83,6 +137,12 @@
 
 const char *InitializePlatform();
 void FinalizePlatform();
+void MapThreadTrace(uptr addr, uptr size);
+uptr ALWAYS_INLINE INLINE GetThreadTrace(int tid) {
+  uptr p = kTraceMemBegin + (uptr)tid * kTraceSize * sizeof(Event);
+  DCHECK_LT(p, kTraceMemBegin + kTraceMemSize);
+  return p;
+}
 
 void internal_start_thread(void(*func)(void*), void *arg);
 
@@ -95,7 +155,7 @@
 
 }  // namespace __tsan
 
-#else  // __LP64__
+#else  // defined(__LP64__) || defined(_WIN64)
 # error "Only 64-bit is supported"
 #endif
 
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index eed5062..0a2ec3c 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -83,46 +83,52 @@
   if (beg == end)
     return;
   if (beg != (uptr)Mprotect(beg, end - beg)) {
-    TsanPrintf("FATAL: ThreadSanitizer can not protect [%zx,%zx]\n", beg, end);
-    TsanPrintf("FATAL: Make sure you are not using unlimited stack\n");
+    Printf("FATAL: ThreadSanitizer can not protect [%zx,%zx]\n", beg, end);
+    Printf("FATAL: Make sure you are not using unlimited stack\n");
     Die();
   }
 }
 #endif
 
+#ifndef TSAN_GO
 void InitializeShadowMemory() {
   uptr shadow = (uptr)MmapFixedNoReserve(kLinuxShadowBeg,
     kLinuxShadowEnd - kLinuxShadowBeg);
   if (shadow != kLinuxShadowBeg) {
-    TsanPrintf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
-    TsanPrintf("FATAL: Make sure to compile with -fPIE and "
+    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
+    Printf("FATAL: Make sure to compile with -fPIE and "
                "to link with -pie (%p, %p).\n", shadow, kLinuxShadowBeg);
     Die();
   }
-#ifndef TSAN_GO
   const uptr kClosedLowBeg  = 0x200000;
   const uptr kClosedLowEnd  = kLinuxShadowBeg - 1;
   const uptr kClosedMidBeg = kLinuxShadowEnd + 1;
-  const uptr kClosedMidEnd = kLinuxAppMemBeg - 1;
+  const uptr kClosedMidEnd = min(kLinuxAppMemBeg, kTraceMemBegin);
   ProtectRange(kClosedLowBeg, kClosedLowEnd);
   ProtectRange(kClosedMidBeg, kClosedMidEnd);
-#endif
-#ifndef TSAN_GO
   DPrintf("kClosedLow   %zx-%zx (%zuGB)\n",
       kClosedLowBeg, kClosedLowEnd, (kClosedLowEnd - kClosedLowBeg) >> 30);
-#endif
   DPrintf("kLinuxShadow %zx-%zx (%zuGB)\n",
       kLinuxShadowBeg, kLinuxShadowEnd,
       (kLinuxShadowEnd - kLinuxShadowBeg) >> 30);
-#ifndef TSAN_GO
   DPrintf("kClosedMid   %zx-%zx (%zuGB)\n",
       kClosedMidBeg, kClosedMidEnd, (kClosedMidEnd - kClosedMidBeg) >> 30);
-#endif
   DPrintf("kLinuxAppMem %zx-%zx (%zuGB)\n",
       kLinuxAppMemBeg, kLinuxAppMemEnd,
       (kLinuxAppMemEnd - kLinuxAppMemBeg) >> 30);
   DPrintf("stack        %zx\n", (uptr)&shadow);
 }
+#endif
+
+void MapThreadTrace(uptr addr, uptr size) {
+  DPrintf("Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size);
+  CHECK_GE(addr, kTraceMemBegin);
+  CHECK_LE(addr + size, kTraceMemBegin + kTraceMemSize);
+  if (addr != (uptr)MmapFixedNoReserve(addr, size)) {
+    Printf("FATAL: ThreadSanitizer can not mmap thread trace\n");
+    Die();
+  }
+}
 
 static uptr g_data_start;
 static uptr g_data_end;
@@ -135,10 +141,10 @@
   if (proc_maps.Next(&start, &end,
                      /*offset*/0, /*filename*/0, /*filename_size*/0)) {
     if ((u64)start < kLinuxAppMemBeg) {
-      TsanPrintf("FATAL: ThreadSanitizer can not mmap the shadow memory ("
+      Printf("FATAL: ThreadSanitizer can not mmap the shadow memory ("
              "something is mapped at 0x%zx < 0x%zx)\n",
              start, kLinuxAppMemBeg);
-      TsanPrintf("FATAL: Make sure to compile with -fPIE"
+      Printf("FATAL: Make sure to compile with -fPIE"
              " and to link with -pie.\n");
       Die();
     }
@@ -196,34 +202,56 @@
 }
 #endif  // #ifndef TSAN_GO
 
+static rlim_t getlim(int res) {
+  rlimit rlim;
+  CHECK_EQ(0, getrlimit(res, &rlim));
+  return rlim.rlim_cur;
+}
+
+static void setlim(int res, rlim_t lim) {
+  // The following magic is to prevent clang from replacing it with memset.
+  volatile rlimit rlim;
+  rlim.rlim_cur = lim;
+  rlim.rlim_max = lim;
+  setrlimit(res, (rlimit*)&rlim);
+}
+
 const char *InitializePlatform() {
   void *p = 0;
   if (sizeof(p) == 8) {
     // Disable core dumps, dumping of 16TB usually takes a bit long.
-    // The following magic is to prevent clang from replacing it with memset.
-    volatile rlimit lim;
-    lim.rlim_cur = 0;
-    lim.rlim_max = 0;
-    setrlimit(RLIMIT_CORE, (rlimit*)&lim);
+    setlim(RLIMIT_CORE, 0);
   }
+  bool reexec = false;
   // TSan doesn't play well with unlimited stack size (as stack
   // overlaps with shadow memory). If we detect unlimited stack size,
   // we re-exec the program with limited stack size as a best effort.
-  if (StackSizeIsUnlimited()) {
-    const uptr kMaxStackSize = 32 * 1024 * 1024;  // 32 Mb
+  if (getlim(RLIMIT_STACK) == (rlim_t)-1) {
+    const uptr kMaxStackSize = 32 * 1024 * 1024;
     Report("WARNING: Program is run with unlimited stack size, which "
            "wouldn't work with ThreadSanitizer.\n");
     Report("Re-execing with stack size limited to %zd bytes.\n", kMaxStackSize);
     SetStackSizeLimitInBytes(kMaxStackSize);
-    ReExec();
+    reexec = true;
   }
 
+  if (getlim(RLIMIT_AS) != (rlim_t)-1) {
+    Report("WARNING: Program is run with limited virtual address space, which "
+           "wouldn't work with ThreadSanitizer.\n");
+    Report("Re-execing with unlimited virtual address space.\n");
+    setlim(RLIMIT_AS, -1);
+    reexec = true;
+  }
+
+  if (reexec)
+    ReExec();
+
 #ifndef TSAN_GO
   CheckPIE();
   g_tls_size = (uptr)InitTlsSize();
   InitDataSeg();
 #endif
-  return getenv("TSAN_OPTIONS");
+  return getenv(kTsanOptionsEnv);
 }
 
 void FinalizePlatform() {
diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc
index 53aa659..183061d 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cc
+++ b/lib/tsan/rtl/tsan_platform_mac.cc
@@ -52,13 +52,14 @@
 void FlushShadowMemory() {
 }
 
+#ifndef TSAN_GO
 void InitializeShadowMemory() {
   uptr shadow = (uptr)MmapFixedNoReserve(kLinuxShadowBeg,
     kLinuxShadowEnd - kLinuxShadowBeg);
   if (shadow != kLinuxShadowBeg) {
-    TsanPrintf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
-    TsanPrintf("FATAL: Make sure to compile with -fPIE and "
-               "to link with -pie.\n");
+    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
+    Printf("FATAL: Make sure to compile with -fPIE and "
+           "to link with -pie.\n");
     Die();
   }
   DPrintf("kLinuxShadow %zx-%zx (%zuGB)\n",
@@ -68,6 +69,7 @@
       kLinuxAppMemBeg, kLinuxAppMemEnd,
       (kLinuxAppMemEnd - kLinuxAppMemBeg) >> 30);
 }
+#endif
 
 const char *InitializePlatform() {
   void *p = 0;
@@ -80,7 +82,7 @@
     setrlimit(RLIMIT_CORE, (rlimit*)&lim);
   }
 
-  return getenv("TSAN_OPTIONS");
+  return getenv(kTsanOptionsEnv);
 }
 
 void FinalizePlatform() {
diff --git a/lib/tsan/rtl/tsan_platform_windows.cc b/lib/tsan/rtl/tsan_platform_windows.cc
new file mode 100644
index 0000000..f23e84e
--- /dev/null
+++ b/lib/tsan/rtl/tsan_platform_windows.cc
@@ -0,0 +1,58 @@
+//===-- tsan_platform_windows.cc ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Windows-specific code.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+
+#include "tsan_platform.h"
+
+#include <stdlib.h>
+
+namespace __tsan {
+
+ScopedInRtl::ScopedInRtl() {
+}
+
+ScopedInRtl::~ScopedInRtl() {
+}
+
+uptr GetShadowMemoryConsumption() {
+  return 0;
+}
+
+void FlushShadowMemory() {
+}
+
+const char *InitializePlatform() {
+  return getenv(kTsanOptionsEnv);
+}
+
+void FinalizePlatform() {
+  fflush(0);
+}
+
+uptr GetTlsSize() {
+  return 0;
+}
+
+void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
+                          uptr *tls_addr, uptr *tls_size) {
+  *stk_addr = 0;
+  *stk_size = 0;
+  *tls_addr = 0;
+  *tls_size = 0;
+}
+
+}  // namespace __tsan
+
+#endif  // #ifdef _WIN32
diff --git a/lib/tsan/rtl/tsan_printf.cc b/lib/tsan/rtl/tsan_printf.cc
deleted file mode 100644
index cd6dcbf..0000000
--- a/lib/tsan/rtl/tsan_printf.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- tsan_printf.cc ----------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer (TSan), a race detector.
-//
-//===----------------------------------------------------------------------===//
-
-#include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_libc.h"
-#include "tsan_defs.h"
-#include "tsan_mman.h"
-#include "tsan_platform.h"
-
-#include <stdarg.h>  // va_list
-
-namespace __sanitizer {
-int VSNPrintf(char *buff, int buff_length, const char *format, va_list args);
-}  // namespace __sanitizer
-
-namespace __tsan {
-
-void TsanPrintf(const char *format, ...) {
-  ScopedInRtl in_rtl;
-  const uptr kMaxLen = 16 * 1024;
-  InternalScopedBuffer<char> buffer(kMaxLen);
-  va_list args;
-  va_start(args, format);
-  uptr len = VSNPrintf(buffer.data(), buffer.size(), format, args);
-  va_end(args);
-  internal_write(CTX() ? flags()->log_fileno : 2,
-      buffer.data(), len < buffer.size() ? len : buffer.size() - 1);
-}
-
-}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
index f24a4ec..af8235a 100644
--- a/lib/tsan/rtl/tsan_report.cc
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -25,104 +25,128 @@
     , sleep() {
 }
 
+ReportMop::ReportMop()
+    : mset(MBlockReportMutex) {
+}
+
 ReportDesc::~ReportDesc() {
+  // FIXME(dvyukov): it must be leaking a lot of memory.
 }
 
 #ifndef TSAN_GO
 
 static void PrintHeader(ReportType typ) {
-  TsanPrintf("WARNING: ThreadSanitizer: ");
+  Printf("WARNING: ThreadSanitizer: ");
 
   if (typ == ReportTypeRace)
-    TsanPrintf("data race");
+    Printf("data race");
   else if (typ == ReportTypeUseAfterFree)
-    TsanPrintf("heap-use-after-free");
+    Printf("heap-use-after-free");
   else if (typ == ReportTypeThreadLeak)
-    TsanPrintf("thread leak");
+    Printf("thread leak");
   else if (typ == ReportTypeMutexDestroyLocked)
-    TsanPrintf("destroy of a locked mutex");
+    Printf("destroy of a locked mutex");
   else if (typ == ReportTypeSignalUnsafe)
-    TsanPrintf("signal-unsafe call inside of a signal");
+    Printf("signal-unsafe call inside of a signal");
   else if (typ == ReportTypeErrnoInSignal)
-    TsanPrintf("signal handler spoils errno");
+    Printf("signal handler spoils errno");
 
-  TsanPrintf(" (pid=%d)\n", GetPid());
+  Printf(" (pid=%d)\n", GetPid());
 }
 
 void PrintStack(const ReportStack *ent) {
-  for (int i = 0; ent; ent = ent->next, i++) {
-    TsanPrintf("    #%d %s %s:%d", i, ent->func, ent->file, ent->line);
-    if (ent->col)
-      TsanPrintf(":%d", ent->col);
-    if (ent->module && ent->offset)
-      TsanPrintf(" (%s+%p)\n", ent->module, (void*)ent->offset);
-    else
-      TsanPrintf(" (%p)\n", (void*)ent->pc);
+  if (ent == 0) {
+    Printf("    [failed to restore the stack]\n\n");
+    return;
   }
-  TsanPrintf("\n");
+  for (int i = 0; ent; ent = ent->next, i++) {
+    Printf("    #%d %s %s:%d", i, ent->func, ent->file, ent->line);
+    if (ent->col)
+      Printf(":%d", ent->col);
+    if (ent->module && ent->offset)
+      Printf(" (%s+%p)\n", ent->module, (void*)ent->offset);
+    else
+      Printf(" (%p)\n", (void*)ent->pc);
+  }
+  Printf("\n");
+}
+
+static void PrintMutexSet(Vector<ReportMopMutex> const& mset) {
+  for (uptr i = 0; i < mset.Size(); i++) {
+    if (i == 0)
+      Printf(" (mutexes:");
+    const ReportMopMutex m = mset[i];
+    Printf(" %s M%llu", m.write ? "write" : "read", m.id);
+    Printf(i == mset.Size() - 1 ? ")" : ",");
+  }
 }
 
 static void PrintMop(const ReportMop *mop, bool first) {
-  TsanPrintf("  %s of size %d at %p",
+  Printf("  %s of size %d at %p",
       (first ? (mop->write ? "Write" : "Read")
              : (mop->write ? "Previous write" : "Previous read")),
       mop->size, (void*)mop->addr);
   if (mop->tid == 0)
-    TsanPrintf(" by main thread:\n");
+    Printf(" by main thread");
   else
-    TsanPrintf(" by thread %d:\n", mop->tid);
+    Printf(" by thread T%d", mop->tid);
+  PrintMutexSet(mop->mset);
+  Printf(":\n");
   PrintStack(mop->stack);
 }
 
 static void PrintLocation(const ReportLocation *loc) {
   if (loc->type == ReportLocationGlobal) {
-    TsanPrintf("  Location is global '%s' of size %zu at %zx %s:%d\n",
-               loc->name, loc->size, loc->addr, loc->file, loc->line);
+    Printf("  Location is global '%s' of size %zu at %zx %s:%d (%s+%p)\n\n",
+               loc->name, loc->size, loc->addr, loc->file, loc->line,
+               loc->module, loc->offset);
   } else if (loc->type == ReportLocationHeap) {
-    TsanPrintf("  Location is heap block of size %zu at %p allocated",
+    Printf("  Location is heap block of size %zu at %p allocated",
         loc->size, loc->addr);
     if (loc->tid == 0)
-      TsanPrintf(" by main thread:\n");
+      Printf(" by main thread:\n");
     else
-      TsanPrintf(" by thread %d:\n", loc->tid);
+      Printf(" by thread T%d:\n", loc->tid);
     PrintStack(loc->stack);
   } else if (loc->type == ReportLocationStack) {
-    TsanPrintf("  Location is stack of thread %d:\n", loc->tid);
+    Printf("  Location is stack of thread T%d:\n\n", loc->tid);
   }
 }
 
 static void PrintMutex(const ReportMutex *rm) {
-  if (rm->stack == 0)
-    return;
-  TsanPrintf("  Mutex %d created at:\n", rm->id);
-  PrintStack(rm->stack);
+  if (rm->destroyed) {
+    Printf("  Mutex M%llu is already destroyed.\n\n", rm->id);
+  } else {
+    Printf("  Mutex M%llu created at:\n", rm->id);
+    PrintStack(rm->stack);
+  }
 }
 
 static void PrintThread(const ReportThread *rt) {
   if (rt->id == 0)  // Little sense in describing the main thread.
     return;
-  TsanPrintf("  Thread %d", rt->id);
+  Printf("  Thread T%d", rt->id);
   if (rt->name)
-    TsanPrintf(" '%s'", rt->name);
-  TsanPrintf(" (tid=%zu, %s)", rt->pid, rt->running ? "running" : "finished");
+    Printf(" '%s'", rt->name);
+  Printf(" (tid=%zu, %s)", rt->pid, rt->running ? "running" : "finished");
   if (rt->stack)
-    TsanPrintf(" created at:");
-  TsanPrintf("\n");
+    Printf(" created at:");
+  Printf("\n");
   PrintStack(rt->stack);
 }
 
 static void PrintSleep(const ReportStack *s) {
-  TsanPrintf("  As if synchronized via sleep:\n");
+  Printf("  As if synchronized via sleep:\n");
   PrintStack(s);
 }
 
 void PrintReport(const ReportDesc *rep) {
-  TsanPrintf("==================\n");
+  Printf("==================\n");
   PrintHeader(rep->typ);
 
   for (uptr i = 0; i < rep->stacks.Size(); i++) {
     if (i)
-      TsanPrintf("  and:\n");
+      Printf("  and:\n");
     PrintStack(rep->stacks[i]);
   }
 
@@ -141,21 +165,25 @@
   for (uptr i = 0; i < rep->threads.Size(); i++)
     PrintThread(rep->threads[i]);
 
-  TsanPrintf("==================\n");
+  Printf("==================\n");
 }
 
 #else
 
 void PrintStack(const ReportStack *ent) {
+  if (ent == 0) {
+    Printf("  [failed to restore the stack]\n\n");
+    return;
+  }
   for (int i = 0; ent; ent = ent->next, i++) {
-    TsanPrintf("  %s()\n      %s:%d +0x%zx\n",
+    Printf("  %s()\n      %s:%d +0x%zx\n",
         ent->func, ent->file, ent->line, (void*)ent->offset);
   }
-  TsanPrintf("\n");
+  Printf("\n");
 }
 
 static void PrintMop(const ReportMop *mop, bool first) {
-  TsanPrintf("%s by goroutine %d:\n",
+  Printf("%s by goroutine %d:\n",
       (first ? (mop->write ? "Write" : "Read")
              : (mop->write ? "Previous write" : "Previous read")),
       mop->tid);
@@ -165,19 +193,19 @@
 static void PrintThread(const ReportThread *rt) {
   if (rt->id == 0)  // Little sense in describing the main thread.
     return;
-  TsanPrintf("Goroutine %d (%s) created at:\n",
+  Printf("Goroutine %d (%s) created at:\n",
     rt->id, rt->running ? "running" : "finished");
   PrintStack(rt->stack);
 }
 
 void PrintReport(const ReportDesc *rep) {
-  TsanPrintf("==================\n");
-  TsanPrintf("WARNING: DATA RACE\n");
+  Printf("==================\n");
+  Printf("WARNING: DATA RACE\n");
   for (uptr i = 0; i < rep->mops.Size(); i++)
     PrintMop(rep->mops[i], i == 0);
   for (uptr i = 0; i < rep->threads.Size(); i++)
     PrintThread(rep->threads[i]);
-  TsanPrintf("==================\n");
+  Printf("==================\n");
 }
 
 #endif
diff --git a/lib/tsan/rtl/tsan_report.h b/lib/tsan/rtl/tsan_report.h
index 34dc88f..2c3667e 100644
--- a/lib/tsan/rtl/tsan_report.h
+++ b/lib/tsan/rtl/tsan_report.h
@@ -38,14 +38,20 @@
   int col;
 };
 
+struct ReportMopMutex {
+  u64 id;
+  bool write;
+};
+
 struct ReportMop {
   int tid;
   uptr addr;
   int size;
   bool write;
-  int nmutex;
-  int *mutex;
+  Vector<ReportMopMutex> mset;
   ReportStack *stack;
+
+  ReportMop();
 };
 
 enum ReportLocationType {
@@ -58,6 +64,8 @@
   ReportLocationType type;
   uptr addr;
   uptr size;
+  char *module;
+  uptr offset;
   int tid;
   char *name;
   char *file;
@@ -74,7 +82,8 @@
 };
 
 struct ReportMutex {
-  int id;
+  u64 id;
+  bool destroyed;
   ReportStack *stack;
 };
 
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
index 8e1d6c7..7bb090d 100644
--- a/lib/tsan/rtl/tsan_rtl.cc
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -84,7 +84,8 @@
   , epoch0()
   , epoch1()
   , dead_info()
-  , dead_next() {
+  , dead_next()
+  , name() {
 }
 
 static void WriteMemoryProfile(char *buf, uptr buf_size, int num) {
@@ -139,7 +140,7 @@
       flags()->profile_memory, GetPid());
   fd_t fd = internal_open(filename.data(), true);
   if (fd == kInvalidFd) {
-    TsanPrintf("Failed to open memory profile file '%s'\n", &filename[0]);
+    Printf("Failed to open memory profile file '%s'\n", &filename[0]);
     Die();
   }
   internal_start_thread(&MemoryProfileThread, (void*)(uptr)fd);
@@ -161,6 +162,10 @@
   internal_start_thread(&MemoryFlushThread, 0);
 }
 
+void MapShadow(uptr addr, uptr size) {
+  MmapFixedNoReserve(MemToShadow(addr), size * kShadowMultiplier);
+}
+
 void Initialize(ThreadState *thr) {
   // Thread safe because done before all threads exist.
   static bool is_initialized = false;
@@ -179,24 +184,37 @@
   InitializeMutex();
   InitializeDynamicAnnotations();
   ctx = new(ctx_placeholder) Context;
+#ifndef TSAN_GO
   InitializeShadowMemory();
+#endif
   ctx->dead_list_size = 0;
   ctx->dead_list_head = 0;
   ctx->dead_list_tail = 0;
   InitializeFlags(&ctx->flags, env);
+  // Setup correct file descriptor for error reports.
+  if (internal_strcmp(flags()->log_path, "stdout") == 0)
+    __sanitizer_set_report_fd(kStdoutFd);
+  else if (internal_strcmp(flags()->log_path, "stderr") == 0)
+    __sanitizer_set_report_fd(kStderrFd);
+  else
+    __sanitizer_set_report_path(flags()->log_path);
   InitializeSuppressions();
 #ifndef TSAN_GO
   // Initialize external symbolizer before internal threads are started.
   const char *external_symbolizer = flags()->external_symbolizer_path;
   if (external_symbolizer != 0 && external_symbolizer[0] != '\0') {
-    InitializeExternalSymbolizer(external_symbolizer);
+    if (!InitializeExternalSymbolizer(external_symbolizer)) {
+      Printf("Failed to start external symbolizer: '%s'\n",
+             external_symbolizer);
+      Die();
+    }
   }
 #endif
   InitializeMemoryProfile();
   InitializeMemoryFlush();
 
   if (ctx->flags.verbosity)
-    TsanPrintf("***** Running under ThreadSanitizer v2 (pid %d) *****\n",
+    Printf("***** Running under ThreadSanitizer v2 (pid %d) *****\n",
                GetPid());
 
   // Initialize thread 0.
@@ -208,7 +226,7 @@
   ctx->initialized = true;
 
   if (flags()->stop_on_start) {
-    TsanPrintf("ThreadSanitizer is suspended at startup (pid %d)."
+    Printf("ThreadSanitizer is suspended at startup (pid %d)."
            " Call __tsan_resume().\n",
            GetPid());
     while (__tsan_resumed == 0);
@@ -220,6 +238,9 @@
   Context *ctx = __tsan::ctx;
   bool failed = false;
 
+  if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1)
+    SleepForMillis(flags()->atexit_sleep_ms);
+
   // Wait for pending reports.
   ctx->report_mtx.Lock();
   ctx->report_mtx.Unlock();
@@ -229,18 +250,19 @@
   if (ctx->nreported) {
     failed = true;
 #ifndef TSAN_GO
-    TsanPrintf("ThreadSanitizer: reported %d warnings\n", ctx->nreported);
+    Printf("ThreadSanitizer: reported %d warnings\n", ctx->nreported);
 #else
-    TsanPrintf("Found %d data race(s)\n", ctx->nreported);
+    Printf("Found %d data race(s)\n", ctx->nreported);
 #endif
   }
 
   if (ctx->nmissed_expected) {
     failed = true;
-    TsanPrintf("ThreadSanitizer: missed %d expected races\n",
+    Printf("ThreadSanitizer: missed %d expected races\n",
         ctx->nmissed_expected);
   }
 
+  StatAggregate(ctx->stat, thr->stat);
   StatOutput(ctx->stat);
   return failed ? flags()->exitcode : 0;
 }
@@ -265,13 +287,28 @@
   thr->nomalloc++;
   ScopedInRtl in_rtl;
   Lock l(&thr->trace.mtx);
-  unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % kTraceParts;
+  unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts();
   TraceHeader *hdr = &thr->trace.headers[trace];
   hdr->epoch0 = thr->fast_state.epoch();
   hdr->stack0.ObtainCurrent(thr, 0);
+  hdr->mset0 = thr->mset;
   thr->nomalloc--;
 }
 
+uptr TraceTopPC(ThreadState *thr) {
+  Event *events = (Event*)GetThreadTrace(thr->tid);
+  uptr pc = events[thr->fast_state.GetTracePos()];
+  return pc;
+}
+
+uptr TraceSize() {
+  return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1));
+}
+
+uptr TraceParts() {
+  return TraceSize() / kTracePartSize;
+}
+
 #ifndef TSAN_GO
 extern "C" void __tsan_trace_switch() {
   TraceSwitch(cur_thread());
@@ -315,11 +352,11 @@
   return !kAccessIsWrite && !s.is_write();
 }
 
-static inline bool OldIsRWStronger(Shadow old, int kAccessIsWrite) {
+static inline bool OldIsRWNotWeaker(Shadow old, int kAccessIsWrite) {
   return old.is_write() || !kAccessIsWrite;
 }
 
-static inline bool OldIsRWWeaker(Shadow old, int kAccessIsWrite) {
+static inline bool OldIsRWWeakerOrEqual(Shadow old, int kAccessIsWrite) {
   return !old.is_write() || kAccessIsWrite;
 }
 
@@ -328,12 +365,12 @@
 }
 
 static inline bool HappensBefore(Shadow old, ThreadState *thr) {
-  return thr->clock.get(old.tid()) >= old.epoch();
+  return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
 }
 
 ALWAYS_INLINE
 void MemoryAccessImpl(ThreadState *thr, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite, FastState fast_state,
+    int kAccessSizeLog, bool kAccessIsWrite,
     u64 *shadow_mem, Shadow cur) {
   StatInc(thr, StatMop);
   StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
@@ -409,7 +446,7 @@
 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
     int kAccessSizeLog, bool kAccessIsWrite) {
   u64 *shadow_mem = (u64*)MemToShadow(addr);
-  DPrintf2("#%d: tsan::OnMemoryAccess: @%p %p size=%d"
+  DPrintf2("#%d: MemoryAccess: @%p %p size=%d"
       " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
       (int)thr->fast_state.tid(), (void*)pc, (void*)addr,
       (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
@@ -417,11 +454,11 @@
       (uptr)shadow_mem[2], (uptr)shadow_mem[3]);
 #if TSAN_DEBUG
   if (!IsAppMem(addr)) {
-    TsanPrintf("Access to non app mem %zx\n", addr);
+    Printf("Access to non app mem %zx\n", addr);
     DCHECK(IsAppMem(addr));
   }
   if (!IsShadowMem((uptr)shadow_mem)) {
-    TsanPrintf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+    Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
     DCHECK(IsShadowMem((uptr)shadow_mem));
   }
 #endif
@@ -437,9 +474,9 @@
 
   // We must not store to the trace if we do not store to the shadow.
   // That is, this call must be moved somewhere below.
-  TraceAddEvent(thr, fast_state.epoch(), EventTypeMop, pc);
+  TraceAddEvent(thr, fast_state, EventTypeMop, pc);
 
-  MemoryAccessImpl(thr, addr, kAccessSizeLog, kAccessIsWrite, fast_state,
+  MemoryAccessImpl(thr, addr, kAccessSizeLog, kAccessIsWrite,
       shadow_mem, cur);
 }
 
@@ -488,6 +525,7 @@
 void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
   MemoryAccessRange(thr, pc, addr, size, true);
   Shadow s(thr->fast_state);
+  s.ClearIgnoreBit();
   s.MarkAsFreed();
   s.SetWrite(true);
   s.SetAddr0AndSizeLog(0, 3);
@@ -496,17 +534,19 @@
 
 void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) {
   Shadow s(thr->fast_state);
+  s.ClearIgnoreBit();
   s.SetWrite(true);
   s.SetAddr0AndSizeLog(0, 3);
   MemoryRangeSet(thr, pc, addr, size, s.raw());
 }
 
+ALWAYS_INLINE
 void FuncEntry(ThreadState *thr, uptr pc) {
   DCHECK_EQ(thr->in_rtl, 0);
   StatInc(thr, StatFuncEnter);
   DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void*)pc);
   thr->fast_state.IncrementEpoch();
-  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeFuncEnter, pc);
+  TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc);
 
   // Shadow stack maintenance can be replaced with
   // stack unwinding during trace switch (which presumably must be faster).
@@ -530,12 +570,13 @@
   thr->shadow_stack_pos++;
 }
 
+ALWAYS_INLINE
 void FuncExit(ThreadState *thr) {
   DCHECK_EQ(thr->in_rtl, 0);
   StatInc(thr, StatFuncExit);
   DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid());
   thr->fast_state.IncrementEpoch();
-  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeFuncExit, 0);
+  TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0);
 
   DCHECK_GT(thr->shadow_stack_pos, &thr->shadow_stack[0]);
 #ifndef TSAN_GO
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index c4632c7..5d74286 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -27,7 +27,7 @@
 #define TSAN_RTL_H
 
 #include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_allocator64.h"
+#include "sanitizer_common/sanitizer_allocator.h"
 #include "tsan_clock.h"
 #include "tsan_defs.h"
 #include "tsan_flags.h"
@@ -35,6 +35,12 @@
 #include "tsan_trace.h"
 #include "tsan_vector.h"
 #include "tsan_report.h"
+#include "tsan_platform.h"
+#include "tsan_mutexset.h"
+
+#if SANITIZER_WORDSIZE != 64
+# error "ThreadSanitizer is supported only on 64-bit platforms"
+#endif
 
 namespace __tsan {
 
@@ -45,6 +51,10 @@
   u32 alloc_tid;
   u32 alloc_stack_id;
   SyncVar *head;
+
+  MBlock()
+    : mtx(MutexTypeMBlock, StatMtxMBlock) {
+  }
 };
 
 #ifndef TSAN_GO
@@ -57,8 +67,7 @@
 
 typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, sizeof(MBlock),
     DefaultSizeClassMap> PrimaryAllocator;
-typedef SizeClassAllocatorLocalCache<PrimaryAllocator::kNumClasses,
-    PrimaryAllocator> AllocatorCache;
+typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
 typedef LargeMmapAllocator SecondaryAllocator;
 typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
     SecondaryAllocator> Allocator;
@@ -67,21 +76,21 @@
 
 void TsanCheckFailed(const char *file, int line, const char *cond,
                      u64 v1, u64 v2);
-void TsanPrintf(const char *format, ...);
 
 // FastState (from most significant bit):
-//   unused          : 1
+//   ignore          : 1
 //   tid             : kTidBits
 //   epoch           : kClkBits
 //   unused          : -
-//   ignore_bit      : 1
+//   history_size    : 3
 class FastState {
  public:
   FastState(u64 tid, u64 epoch) {
     x_ = tid << kTidShift;
     x_ |= epoch << kClkShift;
-    DCHECK(tid == this->tid());
-    DCHECK(epoch == this->epoch());
+    DCHECK_EQ(tid, this->tid());
+    DCHECK_EQ(epoch, this->epoch());
+    DCHECK_EQ(GetIgnoreBit(), false);
   }
 
   explicit FastState(u64 x)
@@ -93,6 +102,11 @@
   }
 
   u64 tid() const {
+    u64 res = (x_ & ~kIgnoreBit) >> kTidShift;
+    return res;
+  }
+
+  u64 TidWithIgnore() const {
     u64 res = x_ >> kTidShift;
     return res;
   }
@@ -111,13 +125,34 @@
 
   void SetIgnoreBit() { x_ |= kIgnoreBit; }
   void ClearIgnoreBit() { x_ &= ~kIgnoreBit; }
-  bool GetIgnoreBit() const { return x_ & kIgnoreBit; }
+  bool GetIgnoreBit() const { return (s64)x_ < 0; }
+
+  void SetHistorySize(int hs) {
+    CHECK_GE(hs, 0);
+    CHECK_LE(hs, 7);
+    x_ = (x_ & ~7) | hs;
+  }
+
+  int GetHistorySize() const {
+    return (int)(x_ & 7);
+  }
+
+  void ClearHistorySize() {
+    x_ &= ~7;
+  }
+
+  u64 GetTracePos() const {
+    const int hs = GetHistorySize();
+    // When hs == 0, the trace consists of 2 parts.
+    const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1;
+    return epoch() & mask;
+  }
 
  private:
   friend class Shadow;
   static const int kTidShift = 64 - kTidBits - 1;
   static const int kClkShift = kTidShift - kClkBits;
-  static const u64 kIgnoreBit = 1ull;
+  static const u64 kIgnoreBit = 1ull << 63;
   static const u64 kFreedBit = 1ull << 63;
   u64 x_;
 };
@@ -131,9 +166,14 @@
 //   addr0           : 3
 class Shadow : public FastState {
  public:
-  explicit Shadow(u64 x) : FastState(x) { }
+  explicit Shadow(u64 x)
+      : FastState(x) {
+  }
 
-  explicit Shadow(const FastState &s) : FastState(s.x_) { }
+  explicit Shadow(const FastState &s)
+      : FastState(s.x_) {
+    ClearHistorySize();
+  }
 
   void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
     DCHECK_EQ(x_ & 31, 0);
@@ -155,7 +195,7 @@
 
   static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
     u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
-    DCHECK_EQ(shifted_xor == 0, s1.tid() == s2.tid());
+    DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore());
     return shifted_xor == 0;
   }
 
@@ -232,10 +272,6 @@
   }
 };
 
-// Freed memory.
-// As if 8-byte write by thread 0xff..f at epoch 0xff..f, races with everything.
-const u64 kShadowFreed = 0xfffffffffffffff8ull;
-
 struct SignalContext;
 
 // This struct is stored in TLS.
@@ -269,6 +305,7 @@
   uptr *shadow_stack;
   uptr *shadow_stack_end;
 #endif
+  MutexSet mset;
   ThreadClock clock;
 #ifndef TSAN_GO
   AllocatorCache alloc_cache;
@@ -342,6 +379,7 @@
   StackTrace creation_stack;
   ThreadDeadInfo *dead_info;
   ThreadContext *dead_next;  // In dead thread list.
+  char *name;  // As annotated by user.
 
   explicit ThreadContext(int tid);
 };
@@ -415,7 +453,8 @@
   ~ScopedReport();
 
   void AddStack(const StackTrace *stack);
-  void AddMemoryAccess(uptr addr, Shadow s, const StackTrace *stack);
+  void AddMemoryAccess(uptr addr, Shadow s, const StackTrace *stack,
+                       const MutexSet *mset);
   void AddThread(const ThreadContext *tctx);
   void AddMutex(const SyncVar *s);
   void AddLocation(uptr addr, uptr size);
@@ -427,11 +466,13 @@
   Context *ctx_;
   ReportDesc *rep_;
 
+  void AddMutex(u64 id);
+
   ScopedReport(const ScopedReport&);
   void operator = (const ScopedReport&);
 };
 
-void RestoreStack(int tid, const u64 epoch, StackTrace *stk);
+void RestoreStack(int tid, const u64 epoch, StackTrace *stk, MutexSet *mset);
 
 void StatAggregate(u64 *dst, u64 *src);
 void StatOutput(u64 *stat);
@@ -440,6 +481,7 @@
     thr->stat[typ] += n;
 }
 
+void MapShadow(uptr addr, uptr size);
 void InitializeShadowMemory();
 void InitializeInterceptors();
 void InitializeDynamicAnnotations();
@@ -454,13 +496,13 @@
 bool IsExpectedReport(uptr addr, uptr size);
 
 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
-# define DPrintf TsanPrintf
+# define DPrintf Printf
 #else
 # define DPrintf(...)
 #endif
 
 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 2
-# define DPrintf2 TsanPrintf
+# define DPrintf2 Printf
 #else
 # define DPrintf2(...)
 #endif
@@ -474,7 +516,7 @@
 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
     int kAccessSizeLog, bool kAccessIsWrite);
 void MemoryAccessImpl(ThreadState *thr, uptr addr,
-    int kAccessSizeLog, bool kAccessIsWrite, FastState fast_state,
+    int kAccessSizeLog, bool kAccessIsWrite,
     u64 *shadow_mem, Shadow cur);
 void MemoryRead1Byte(ThreadState *thr, uptr pc, uptr addr);
 void MemoryWrite1Byte(ThreadState *thr, uptr pc, uptr addr);
@@ -497,7 +539,9 @@
 void ThreadJoin(ThreadState *thr, uptr pc, int tid);
 void ThreadDetach(ThreadState *thr, uptr pc, int tid);
 void ThreadFinalize(ThreadState *thr);
-void ThreadFinalizerGoroutine(ThreadState *thr);
+void ThreadSetName(ThreadState *thr, const char *name);
+int ThreadCount(ThreadState *thr);
+void ProcessPendingSignals(ThreadState *thr);
 
 void MutexCreate(ThreadState *thr, uptr pc, uptr addr,
                  bool rw, bool recursive, bool linker_init);
@@ -509,6 +553,7 @@
 void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr);
 
 void Acquire(ThreadState *thr, uptr pc, uptr addr);
+void AcquireGlobal(ThreadState *thr, uptr pc);
 void Release(ThreadState *thr, uptr pc, uptr addr);
 void ReleaseStore(ThreadState *thr, uptr pc, uptr addr);
 void AfterSleep(ThreadState *thr, uptr pc);
@@ -525,6 +570,7 @@
 #define HACKY_CALL(f) \
   __asm__ __volatile__("sub $1024, %%rsp;" \
                        "/*.cfi_adjust_cfa_offset 1024;*/" \
+                       ".hidden " #f "_thunk;" \
                        "call " #f "_thunk;" \
                        "add $1024, %%rsp;" \
                        "/*.cfi_adjust_cfa_offset -1024;*/" \
@@ -534,19 +580,27 @@
 #endif
 
 void TraceSwitch(ThreadState *thr);
+uptr TraceTopPC(ThreadState *thr);
+uptr TraceSize();
+uptr TraceParts();
 
 extern "C" void __tsan_trace_switch();
-void ALWAYS_INLINE INLINE TraceAddEvent(ThreadState *thr, u64 epoch,
-                                        EventType typ, uptr addr) {
+void ALWAYS_INLINE INLINE TraceAddEvent(ThreadState *thr, FastState fs,
+                                        EventType typ, u64 addr) {
+  DCHECK_GE((int)typ, 0);
+  DCHECK_LE((int)typ, 7);
+  DCHECK_EQ(GetLsb(addr, 61), addr);
   StatInc(thr, StatEvents);
-  if (UNLIKELY((epoch % kTracePartSize) == 0)) {
+  u64 pos = fs.GetTracePos();
+  if (UNLIKELY((pos % kTracePartSize) == 0)) {
 #ifndef TSAN_GO
     HACKY_CALL(__tsan_trace_switch);
 #else
     TraceSwitch(thr);
 #endif
   }
-  Event *evp = &thr->trace.events[epoch % kTraceSize];
+  Event *trace = (Event*)GetThreadTrace(fs.tid());
+  Event *evp = &trace[pos];
   Event ev = (u64)addr | ((u64)typ << 61);
   *evp = ev;
 }
diff --git a/lib/tsan/rtl/tsan_rtl_amd64.S b/lib/tsan/rtl/tsan_rtl_amd64.S
index aee650d..af87856 100644
--- a/lib/tsan/rtl/tsan_rtl_amd64.S
+++ b/lib/tsan/rtl/tsan_rtl_amd64.S
@@ -1,5 +1,6 @@
 .section .text
 
+.hidden __tsan_trace_switch
 .globl __tsan_trace_switch_thunk
 __tsan_trace_switch_thunk:
   .cfi_startproc
@@ -79,6 +80,7 @@
   ret
   .cfi_endproc
 
+.hidden __tsan_report_race
 .globl __tsan_report_race_thunk
 __tsan_report_race_thunk:
   .cfi_startproc
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
index 73320a0..e5c61d0 100644
--- a/lib/tsan/rtl/tsan_rtl_mutex.cc
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -28,7 +28,7 @@
   StatInc(thr, StatMutexCreate);
   if (!linker_init && IsAppMem(addr))
     MemoryWrite1Byte(thr, pc, addr);
-  SyncVar *s = ctx->synctab.GetAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   s->is_rw = rw;
   s->is_recursive = recursive;
   s->is_linker_init = linker_init;
@@ -61,11 +61,12 @@
     trace.ObtainCurrent(thr, pc);
     rep.AddStack(&trace);
     FastState last(s->last_lock);
-    RestoreStack(last.tid(), last.epoch(), &trace);
+    RestoreStack(last.tid(), last.epoch(), &trace, 0);
     rep.AddStack(&trace);
     rep.AddLocation(s->addr, 1);
     OutputReport(ctx, rep);
   }
+  thr->mset.Remove(s->GetId());
   DestroyAndFree(s);
 }
 
@@ -74,9 +75,9 @@
   DPrintf("#%d: MutexLock %zx\n", thr->tid, addr);
   if (IsAppMem(addr))
     MemoryRead1Byte(thr, pc, addr);
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
-  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeLock, addr);
-  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId());
   if (s->owner_tid == SyncVar::kInvalidTid) {
     CHECK_EQ(s->recursion, 0);
     s->owner_tid = thr->tid;
@@ -84,7 +85,7 @@
   } else if (s->owner_tid == thr->tid) {
     CHECK_GT(s->recursion, 0);
   } else {
-    TsanPrintf("ThreadSanitizer WARNING: double lock\n");
+    Printf("ThreadSanitizer WARNING: double lock\n");
     PrintCurrentStack(thr, pc);
   }
   if (s->recursion == 0) {
@@ -98,6 +99,7 @@
     StatInc(thr, StatMutexRecLock);
   }
   s->recursion++;
+  thr->mset.Add(s->GetId(), true, thr->fast_state.epoch());
   s->mtx.Unlock();
 }
 
@@ -106,19 +108,19 @@
   DPrintf("#%d: MutexUnlock %zx\n", thr->tid, addr);
   if (IsAppMem(addr))
     MemoryRead1Byte(thr, pc, addr);
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
-  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeUnlock, addr);
-  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
   if (s->recursion == 0) {
     if (!s->is_broken) {
       s->is_broken = true;
-      TsanPrintf("ThreadSanitizer WARNING: unlock of unlocked mutex\n");
+      Printf("ThreadSanitizer WARNING: unlock of unlocked mutex\n");
       PrintCurrentStack(thr, pc);
     }
   } else if (s->owner_tid != thr->tid) {
     if (!s->is_broken) {
       s->is_broken = true;
-      TsanPrintf("ThreadSanitizer WARNING: mutex unlock by another thread\n");
+      Printf("ThreadSanitizer WARNING: mutex unlock by another thread\n");
       PrintCurrentStack(thr, pc);
     }
   } else {
@@ -134,6 +136,7 @@
       StatInc(thr, StatMutexRecUnlock);
     }
   }
+  thr->mset.Del(s->GetId(), true);
   s->mtx.Unlock();
 }
 
@@ -143,17 +146,18 @@
   StatInc(thr, StatMutexReadLock);
   if (IsAppMem(addr))
     MemoryRead1Byte(thr, pc, addr);
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, false);
   thr->fast_state.IncrementEpoch();
-  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeRLock, addr);
-  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, false);
+  TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId());
   if (s->owner_tid != SyncVar::kInvalidTid) {
-    TsanPrintf("ThreadSanitizer WARNING: read lock of a write locked mutex\n");
+    Printf("ThreadSanitizer WARNING: read lock of a write locked mutex\n");
     PrintCurrentStack(thr, pc);
   }
   thr->clock.set(thr->tid, thr->fast_state.epoch());
   thr->clock.acquire(&s->clock);
   s->last_lock = thr->fast_state.raw();
   StatInc(thr, StatSyncAcquire);
+  thr->mset.Add(s->GetId(), false, thr->fast_state.epoch());
   s->mtx.ReadUnlock();
 }
 
@@ -163,11 +167,11 @@
   StatInc(thr, StatMutexReadUnlock);
   if (IsAppMem(addr))
     MemoryRead1Byte(thr, pc, addr);
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
-  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeRUnlock, addr);
-  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
   if (s->owner_tid != SyncVar::kInvalidTid) {
-    TsanPrintf("ThreadSanitizer WARNING: read unlock of a write "
+    Printf("ThreadSanitizer WARNING: read unlock of a write "
                "locked mutex\n");
     PrintCurrentStack(thr, pc);
   }
@@ -176,6 +180,7 @@
   thr->clock.release(&s->read_clock);
   StatInc(thr, StatSyncRelease);
   s->mtx.Unlock();
+  thr->mset.Del(s->GetId(), false);
 }
 
 void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) {
@@ -183,18 +188,22 @@
   DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr);
   if (IsAppMem(addr))
     MemoryRead1Byte(thr, pc, addr);
-  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  bool write = true;
   if (s->owner_tid == SyncVar::kInvalidTid) {
     // Seems to be read unlock.
+    write = false;
     StatInc(thr, StatMutexReadUnlock);
     thr->fast_state.IncrementEpoch();
-    TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeRUnlock, addr);
+    TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
     thr->clock.set(thr->tid, thr->fast_state.epoch());
     thr->fast_synch_epoch = thr->fast_state.epoch();
     thr->clock.release(&s->read_clock);
     StatInc(thr, StatSyncRelease);
   } else if (s->owner_tid == thr->tid) {
     // Seems to be write unlock.
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
     CHECK_GT(s->recursion, 0);
     s->recursion--;
     if (s->recursion == 0) {
@@ -204,8 +213,6 @@
       // The sequence of events is quite tricky and doubled in several places.
       // First, it's a bug to increment the epoch w/o writing to the trace.
       // Then, the acquire/release logic can be factored out as well.
-      thr->fast_state.IncrementEpoch();
-      TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeUnlock, addr);
       thr->clock.set(thr->tid, thr->fast_state.epoch());
       thr->fast_synch_epoch = thr->fast_state.epoch();
       thr->clock.ReleaseStore(&s->clock);
@@ -215,26 +222,41 @@
     }
   } else if (!s->is_broken) {
     s->is_broken = true;
-    TsanPrintf("ThreadSanitizer WARNING: mutex unlock by another thread\n");
+    Printf("ThreadSanitizer WARNING: mutex unlock by another thread\n");
     PrintCurrentStack(thr, pc);
   }
+  thr->mset.Del(s->GetId(), write);
   s->mtx.Unlock();
 }
 
 void Acquire(ThreadState *thr, uptr pc, uptr addr) {
   CHECK_GT(thr->in_rtl, 0);
   DPrintf("#%d: Acquire %zx\n", thr->tid, addr);
-  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, false);
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, false);
   thr->clock.set(thr->tid, thr->fast_state.epoch());
   thr->clock.acquire(&s->clock);
   StatInc(thr, StatSyncAcquire);
   s->mtx.ReadUnlock();
 }
 
+void AcquireGlobal(ThreadState *thr, uptr pc) {
+  Context *ctx = CTX();
+  Lock l(&ctx->thread_mtx);
+  for (unsigned i = 0; i < kMaxTid; i++) {
+    ThreadContext *tctx = ctx->threads[i];
+    if (tctx == 0)
+      continue;
+    if (tctx->status == ThreadStatusRunning)
+      thr->clock.set(i, tctx->thr->fast_state.epoch());
+    else
+      thr->clock.set(i, tctx->epoch1);
+  }
+}
+
 void Release(ThreadState *thr, uptr pc, uptr addr) {
   CHECK_GT(thr->in_rtl, 0);
   DPrintf("#%d: Release %zx\n", thr->tid, addr);
-  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   thr->clock.set(thr->tid, thr->fast_state.epoch());
   thr->clock.release(&s->clock);
   StatInc(thr, StatSyncRelease);
@@ -244,7 +266,7 @@
 void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) {
   CHECK_GT(thr->in_rtl, 0);
   DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr);
-  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  SyncVar *s = CTX()->synctab.GetOrCreateAndLock(thr, pc, addr, true);
   thr->clock.set(thr->tid, thr->fast_state.epoch());
   thr->clock.ReleaseStore(&s->clock);
   StatInc(thr, StatSyncRelease);
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
index 34eb450..c4dcdfb 100644
--- a/lib/tsan/rtl/tsan_rtl_report.cc
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -14,6 +14,7 @@
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_common.h"
 #include "tsan_platform.h"
 #include "tsan_rtl.h"
 #include "tsan_suppressions.h"
@@ -25,12 +26,14 @@
 
 namespace __tsan {
 
+using namespace __sanitizer;  // NOLINT
+
 void TsanCheckFailed(const char *file, int line, const char *cond,
                      u64 v1, u64 v2) {
   ScopedInRtl in_rtl;
-  TsanPrintf("FATAL: ThreadSanitizer CHECK failed: "
-             "%s:%d \"%s\" (0x%zx, 0x%zx)\n",
-             file, line, cond, (uptr)v1, (uptr)v2);
+  Printf("FATAL: ThreadSanitizer CHECK failed: "
+         "%s:%d \"%s\" (0x%zx, 0x%zx)\n",
+         file, line, cond, (uptr)v1, (uptr)v2);
   Die();
 }
 
@@ -125,8 +128,7 @@
 
 ScopedReport::~ScopedReport() {
   ctx_->report_mtx.Unlock();
-  rep_->~ReportDesc();
-  internal_free(rep_);
+  DestroyAndFree(rep_);
 }
 
 void ScopedReport::AddStack(const StackTrace *stack) {
@@ -135,7 +137,7 @@
 }
 
 void ScopedReport::AddMemoryAccess(uptr addr, Shadow s,
-                                   const StackTrace *stack) {
+    const StackTrace *stack, const MutexSet *mset) {
   void *mem = internal_alloc(MBlockReportMop, sizeof(ReportMop));
   ReportMop *mop = new(mem) ReportMop;
   rep_->mops.PushBack(mop);
@@ -143,8 +145,27 @@
   mop->addr = addr + s.addr0();
   mop->size = s.size();
   mop->write = s.is_write();
-  mop->nmutex = 0;
   mop->stack = SymbolizeStack(*stack);
+  for (uptr i = 0; i < mset->Size(); i++) {
+    MutexSet::Desc d = mset->Get(i);
+    u64 uid = 0;
+    uptr addr = SyncVar::SplitId(d.id, &uid);
+    SyncVar *s = ctx_->synctab.GetIfExistsAndLock(addr, false);
+    // Check that the mutex is still alive.
+    // Another mutex can be created at the same address,
+    // so check uid as well.
+    if (s && s->CheckId(uid)) {
+      ReportMopMutex mtx = {s->uid, d.write};
+      mop->mset.PushBack(mtx);
+      AddMutex(s);
+    } else {
+      ReportMopMutex mtx = {d.id, d.write};
+      mop->mset.PushBack(mtx);
+      AddMutex(d.id);
+    }
+    if (s)
+      s->mtx.ReadUnlock();
+  }
 }
 
 void ScopedReport::AddThread(const ThreadContext *tctx) {
@@ -158,6 +179,7 @@
   rt->id = tctx->tid;
   rt->pid = tctx->os_id;
   rt->running = (tctx->status == ThreadStatusRunning);
+  rt->name = tctx->name ? internal_strdup(tctx->name) : 0;
   rt->stack = SymbolizeStack(tctx->creation_stack);
 }
 
@@ -175,13 +197,31 @@
 #endif
 
 void ScopedReport::AddMutex(const SyncVar *s) {
+  for (uptr i = 0; i < rep_->mutexes.Size(); i++) {
+    if (rep_->mutexes[i]->id == s->uid)
+      return;
+  }
   void *mem = internal_alloc(MBlockReportMutex, sizeof(ReportMutex));
   ReportMutex *rm = new(mem) ReportMutex();
   rep_->mutexes.PushBack(rm);
-  rm->id = 42;
+  rm->id = s->uid;
+  rm->destroyed = false;
   rm->stack = SymbolizeStack(s->creation_stack);
 }
 
+void ScopedReport::AddMutex(u64 id) {
+  for (uptr i = 0; i < rep_->mutexes.Size(); i++) {
+    if (rep_->mutexes[i]->id == id)
+      return;
+  }
+  void *mem = internal_alloc(MBlockReportMutex, sizeof(ReportMutex));
+  ReportMutex *rm = new(mem) ReportMutex();
+  rep_->mutexes.PushBack(rm);
+  rm->id = id;
+  rm->destroyed = true;
+  rm->stack = 0;
+}
+
 void ScopedReport::AddLocation(uptr addr, uptr size) {
   if (addr == 0)
     return;
@@ -220,9 +260,11 @@
     loc->type = ReportLocationGlobal;
     loc->addr = addr;
     loc->size = size;
+    loc->module = symb->module ? internal_strdup(symb->module) : 0;
+    loc->offset = symb->offset;
     loc->tid = 0;
-    loc->name = symb->func;
-    loc->file = symb->file;
+    loc->name = symb->func ? internal_strdup(symb->func) : 0;
+    loc->file = symb->file ? internal_strdup(symb->file) : 0;
     loc->line = symb->line;
     loc->stack = 0;
     internal_free(symb);
@@ -246,7 +288,10 @@
   return rep_;
 }
 
-void RestoreStack(int tid, const u64 epoch, StackTrace *stk) {
+void RestoreStack(int tid, const u64 epoch, StackTrace *stk, MutexSet *mset) {
+  // This function restores stack trace and mutex set for the thread/epoch.
+  // It does so by getting stack trace and mutex set at the beginning of
+  // trace part, and then replaying the trace till the given epoch.
   ThreadContext *tctx = CTX()->threads[tid];
   if (tctx == 0)
     return;
@@ -263,12 +308,13 @@
     return;
   }
   Lock l(&trace->mtx);
-  const int partidx = (epoch / (kTraceSize / kTraceParts)) % kTraceParts;
+  const int partidx = (epoch / kTracePartSize) % TraceParts();
   TraceHeader* hdr = &trace->headers[partidx];
   if (epoch < hdr->epoch0)
     return;
-  const u64 eend = epoch % kTraceSize;
-  const u64 ebegin = eend / kTracePartSize * kTracePartSize;
+  const u64 epoch0 = RoundDown(epoch, TraceSize());
+  const u64 eend = epoch % TraceSize();
+  const u64 ebegin = RoundDown(eend, kTracePartSize);
   DPrintf("#%d: RestoreStack epoch=%zu ebegin=%zu eend=%zu partidx=%d\n",
           tid, (uptr)epoch, (uptr)ebegin, (uptr)eend, partidx);
   InternalScopedBuffer<uptr> stack(1024);  // FIXME: de-hardcode 1024
@@ -276,11 +322,14 @@
     stack[i] = hdr->stack0.Get(i);
     DPrintf2("  #%02lu: pc=%zx\n", i, stack[i]);
   }
+  if (mset)
+    *mset = hdr->mset0;
   uptr pos = hdr->stack0.Size();
+  Event *events = (Event*)GetThreadTrace(tid);
   for (uptr i = ebegin; i <= eend; i++) {
-    Event ev = trace->events[i];
+    Event ev = events[i];
     EventType typ = (EventType)(ev >> 61);
-    uptr pc = (uptr)(ev & 0xffffffffffffull);
+    uptr pc = (uptr)(ev & ((1ull << 61) - 1));
     DPrintf2("  %zu typ=%d pc=%zx\n", i, typ, pc);
     if (typ == EventTypeMop) {
       stack[pos] = pc;
@@ -290,6 +339,17 @@
       if (pos > 0)
         pos--;
     }
+    if (mset) {
+      if (typ == EventTypeLock) {
+        mset->Add(pc, true, epoch0 + i);
+      } else if (typ == EventTypeUnlock) {
+        mset->Del(pc, true);
+      } else if (typ == EventTypeRLock) {
+        mset->Add(pc, false, epoch0 + i);
+      } else if (typ == EventTypeRUnlock) {
+        mset->Del(pc, false);
+      }
+    }
     for (uptr j = 0; j <= pos; j++)
       DPrintf2("      #%zu: %zx\n", j, stack[j]);
   }
@@ -384,7 +444,45 @@
   return false;
 }
 
+// On programs that use Java we see weird reports like:
+// WARNING: ThreadSanitizer: data race (pid=22512)
+//   Read of size 8 at 0x7d2b00084318 by thread 100:
+//     #0 memcpy tsan_interceptors.cc:406 (foo+0x00000d8dfae3)
+//     #1 <null> <null>:0 (0x7f7ad9b40193)
+//   Previous write of size 8 at 0x7d2b00084318 by thread 105:
+//     #0 strncpy tsan_interceptors.cc:501 (foo+0x00000d8e0919)
+//     #1 <null> <null>:0 (0x7f7ad9b42707)
+static bool IsJavaNonsense(const ReportDesc *rep) {
+  for (uptr i = 0; i < rep->mops.Size(); i++) {
+    ReportMop *mop = rep->mops[i];
+    ReportStack *frame = mop->stack;
+    if (frame != 0 && frame->func != 0
+        && (internal_strcmp(frame->func, "memset") == 0
+        || internal_strcmp(frame->func, "memcpy") == 0
+        || internal_strcmp(frame->func, "memmove") == 0
+        || internal_strcmp(frame->func, "strcmp") == 0
+        || internal_strcmp(frame->func, "strncpy") == 0
+        || internal_strcmp(frame->func, "strlen") == 0
+        || internal_strcmp(frame->func, "free") == 0
+        || internal_strcmp(frame->func, "pthread_mutex_lock") == 0)) {
+      frame = frame->next;
+      if (frame == 0
+          || (frame->func == 0 && frame->file == 0 && frame->line == 0
+            && frame->module == 0)) {
+        if (frame) {
+          FiredSuppression supp = {rep->typ, frame->pc};
+          CTX()->fired_suppressions.PushBack(supp);
+        }
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 void ReportRace(ThreadState *thr) {
+  if (!flags()->report_bugs)
+    return;
   ScopedInRtl in_rtl;
 
   bool freed = false;
@@ -414,22 +512,27 @@
   ScopedReport rep(freed ? ReportTypeUseAfterFree : ReportTypeRace);
   const uptr kMop = 2;
   StackTrace traces[kMop];
-  const uptr toppc = thr->trace.events[thr->fast_state.epoch() % kTraceSize]
-      & ((1ull << 61) - 1);
+  const uptr toppc = TraceTopPC(thr);
   traces[0].ObtainCurrent(thr, toppc);
   if (IsFiredSuppression(ctx, rep, traces[0]))
     return;
+  InternalScopedBuffer<MutexSet> mset2(1);
+  new(mset2.data()) MutexSet();
   Shadow s2(thr->racy_state[1]);
-  RestoreStack(s2.tid(), s2.epoch(), &traces[1]);
+  RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2.data());
 
   if (HandleRacyStacks(thr, traces, addr_min, addr_max))
     return;
 
   for (uptr i = 0; i < kMop; i++) {
     Shadow s(thr->racy_state[i]);
-    rep.AddMemoryAccess(addr, s, &traces[i]);
+    rep.AddMemoryAccess(addr, s, &traces[i],
+                        i == 0 ? &thr->mset : mset2.data());
   }
 
+  if (flags()->suppress_java && IsJavaNonsense(rep.GetReport()))
+    return;
+
   for (uptr i = 0; i < kMop; i++) {
     FastState s(thr->racy_state[i]);
     ThreadContext *tctx = ctx->threads[s.tid()];
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index c52dd82..2277a08 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -52,6 +52,23 @@
   }
 }
 
+int ThreadCount(ThreadState *thr) {
+  CHECK_GT(thr->in_rtl, 0);
+  Context *ctx = CTX();
+  Lock l(&ctx->thread_mtx);
+  int cnt = 0;
+  for (unsigned i = 0; i < kMaxTid; i++) {
+    ThreadContext *tctx = ctx->threads[i];
+    if (tctx == 0)
+      continue;
+    if (tctx->status != ThreadStatusCreated
+        && tctx->status != ThreadStatusRunning)
+      continue;
+    cnt++;
+  }
+  return cnt;
+}
+
 static void ThreadDead(ThreadState *thr, ThreadContext *tctx) {
   Context *ctx = CTX();
   CHECK_GT(thr->in_rtl, 0);
@@ -81,8 +98,9 @@
   ThreadContext *tctx = 0;
   if (ctx->dead_list_size > kThreadQuarantineSize
       || ctx->thread_seq >= kMaxTid) {
+    // Reusing old thread descriptor and tid.
     if (ctx->dead_list_size == 0) {
-      TsanPrintf("ThreadSanitizer: %d thread limit exceeded. Dying.\n",
+      Printf("ThreadSanitizer: %d thread limit exceeded. Dying.\n",
                  kMaxTid);
       Die();
     }
@@ -100,12 +118,18 @@
     tctx->sync.Reset();
     tid = tctx->tid;
     DestroyAndFree(tctx->dead_info);
+    if (tctx->name) {
+      internal_free(tctx->name);
+      tctx->name = 0;
+    }
   } else {
+    // Allocating new thread descriptor and tid.
     StatInc(thr, StatThreadMaxTid);
     tid = ctx->thread_seq++;
     void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadContext));
     tctx = new(mem) ThreadContext(tid);
     ctx->threads[tid] = tctx;
+    MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event));
   }
   CHECK_NE(tctx, 0);
   CHECK_GE(tid, 0);
@@ -126,12 +150,11 @@
   if (tid) {
     thr->fast_state.IncrementEpoch();
     // Can't increment epoch w/o writing to the trace as well.
-    TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeMop, 0);
+    TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
     thr->clock.set(thr->tid, thr->fast_state.epoch());
     thr->fast_synch_epoch = thr->fast_state.epoch();
     thr->clock.release(&tctx->sync);
     StatInc(thr, StatSyncRelease);
-
     tctx->creation_stack.ObtainCurrent(thr, pc);
   }
   return tid;
@@ -170,7 +193,9 @@
   CHECK_EQ(tctx->status, ThreadStatusCreated);
   tctx->status = ThreadStatusRunning;
   tctx->os_id = os_id;
-  tctx->epoch0 = tctx->epoch1 + 1;
+  // RoundUp so that one trace part does not contain events
+  // from different threads.
+  tctx->epoch0 = RoundUp(tctx->epoch1 + 1, kTracePartSize);
   tctx->epoch1 = (u64)-1;
   new(thr) ThreadState(CTX(), tid, tctx->unique_id,
       tctx->epoch0, stk_addr, stk_size,
@@ -187,6 +212,9 @@
   thr->fast_synch_epoch = tctx->epoch0;
   thr->clock.set(tid, tctx->epoch0);
   thr->clock.acquire(&tctx->sync);
+  thr->fast_state.SetHistorySize(flags()->history_size);
+  const uptr trace = (tctx->epoch0 / kTracePartSize) % TraceParts();
+  thr->trace.headers[trace].epoch0 = tctx->epoch0;
   StatInc(thr, StatSyncAcquire);
   DPrintf("#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx "
           "tls_addr=%zx tls_size=%zx\n",
@@ -221,7 +249,7 @@
   } else {
     thr->fast_state.IncrementEpoch();
     // Can't increment epoch w/o writing to the trace as well.
-    TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeMop, 0);
+    TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
     thr->clock.set(thr->tid, thr->fast_state.epoch());
     thr->fast_synch_epoch = thr->fast_state.epoch();
     thr->clock.release(&tctx->sync);
@@ -232,9 +260,8 @@
   // Save from info about the thread.
   tctx->dead_info = new(internal_alloc(MBlockDeadInfo, sizeof(ThreadDeadInfo)))
       ThreadDeadInfo();
-  internal_memcpy(&tctx->dead_info->trace.events[0],
-      &thr->trace.events[0], sizeof(thr->trace.events));
-  for (int i = 0; i < kTraceParts; i++) {
+  for (uptr i = 0; i < TraceParts(); i++) {
+    tctx->dead_info->trace.headers[i].epoch0 = thr->trace.headers[i].epoch0;
     tctx->dead_info->trace.headers[i].stack0.CopyFrom(
         thr->trace.headers[i].stack0);
   }
@@ -275,9 +302,10 @@
   Lock l(&ctx->thread_mtx);
   ThreadContext *tctx = ctx->threads[tid];
   if (tctx->status == ThreadStatusInvalid) {
-    TsanPrintf("ThreadSanitizer: join of non-existent thread\n");
+    Printf("ThreadSanitizer: join of non-existent thread\n");
     return;
   }
+  // FIXME(dvyukov): print message and continue (it's user error).
   CHECK_EQ(tctx->detached, false);
   CHECK_EQ(tctx->status, ThreadStatusFinished);
   thr->clock.acquire(&tctx->sync);
@@ -293,7 +321,7 @@
   Lock l(&ctx->thread_mtx);
   ThreadContext *tctx = ctx->threads[tid];
   if (tctx->status == ThreadStatusInvalid) {
-    TsanPrintf("ThreadSanitizer: detach of non-existent thread\n");
+    Printf("ThreadSanitizer: detach of non-existent thread\n");
     return;
   }
   if (tctx->status == ThreadStatusFinished) {
@@ -303,8 +331,18 @@
   }
 }
 
-void ThreadFinalizerGoroutine(ThreadState *thr) {
-  thr->clock.Disable(thr->tid);
+void ThreadSetName(ThreadState *thr, const char *name) {
+  Context *ctx = CTX();
+  Lock l(&ctx->thread_mtx);
+  ThreadContext *tctx = ctx->threads[thr->tid];
+  CHECK_NE(tctx, 0);
+  CHECK_EQ(tctx->status, ThreadStatusRunning);
+  if (tctx->name) {
+    internal_free(tctx->name);
+    tctx->name = 0;
+  }
+  if (name)
+    tctx->name = internal_strdup(name);
 }
 
 void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
@@ -319,19 +357,19 @@
 
 #if TSAN_DEBUG
   if (!IsAppMem(addr)) {
-    TsanPrintf("Access to non app mem %zx\n", addr);
+    Printf("Access to non app mem %zx\n", addr);
     DCHECK(IsAppMem(addr));
   }
   if (!IsAppMem(addr + size - 1)) {
-    TsanPrintf("Access to non app mem %zx\n", addr + size - 1);
+    Printf("Access to non app mem %zx\n", addr + size - 1);
     DCHECK(IsAppMem(addr + size - 1));
   }
   if (!IsShadowMem((uptr)shadow_mem)) {
-    TsanPrintf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+    Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
     DCHECK(IsShadowMem((uptr)shadow_mem));
   }
   if (!IsShadowMem((uptr)(shadow_mem + size * kShadowCnt / 8 - 1))) {
-    TsanPrintf("Bad shadow addr %p (%zx)\n",
+    Printf("Bad shadow addr %p (%zx)\n",
                shadow_mem + size * kShadowCnt / 8 - 1, addr + size - 1);
     DCHECK(IsShadowMem((uptr)(shadow_mem + size * kShadowCnt / 8 - 1)));
   }
@@ -345,7 +383,7 @@
 
   fast_state.IncrementEpoch();
   thr->fast_state = fast_state;
-  TraceAddEvent(thr, fast_state.epoch(), EventTypeMop, pc);
+  TraceAddEvent(thr, fast_state, EventTypeMop, pc);
 
   bool unaligned = (addr % kShadowCell) != 0;
 
@@ -355,7 +393,7 @@
     Shadow cur(fast_state);
     cur.SetWrite(is_write);
     cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, fast_state,
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write,
         shadow_mem, cur);
   }
   if (unaligned)
@@ -366,7 +404,7 @@
     Shadow cur(fast_state);
     cur.SetWrite(is_write);
     cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, fast_state,
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write,
         shadow_mem, cur);
     shadow_mem += kShadowCnt;
   }
@@ -376,7 +414,7 @@
     Shadow cur(fast_state);
     cur.SetWrite(is_write);
     cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
-    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, fast_state,
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write,
         shadow_mem, cur);
   }
 }
diff --git a/lib/tsan/rtl/tsan_stat.cc b/lib/tsan/rtl/tsan_stat.cc
index a7c33a5..6053d28 100644
--- a/lib/tsan/rtl/tsan_stat.cc
+++ b/lib/tsan/rtl/tsan_stat.cc
@@ -77,6 +77,11 @@
   name[StatAtomicStore]                  = "            store                 ";
   name[StatAtomicExchange]               = "            exchange              ";
   name[StatAtomicFetchAdd]               = "            fetch_add             ";
+  name[StatAtomicFetchSub]               = "            fetch_sub             ";
+  name[StatAtomicFetchAnd]               = "            fetch_and             ";
+  name[StatAtomicFetchOr]                = "            fetch_or              ";
+  name[StatAtomicFetchXor]               = "            fetch_xor             ";
+  name[StatAtomicFetchNand]              = "            fetch_nand            ";
   name[StatAtomicCAS]                    = "            compare_exchange      ";
   name[StatAtomicFence]                  = "            fence                 ";
   name[StatAtomicRelaxed]                = "  Including relaxed               ";
@@ -89,11 +94,13 @@
   name[StatAtomic2]                      = "            size 2                ";
   name[StatAtomic4]                      = "            size 4                ";
   name[StatAtomic8]                      = "            size 8                ";
+  name[StatAtomic16]                     = "            size 16               ";
 
   name[StatInterceptor]                  = "Interceptors                      ";
   name[StatInt_longjmp]                  = "  longjmp                         ";
   name[StatInt_siglongjmp]               = "  siglongjmp                      ";
   name[StatInt_malloc]                   = "  malloc                          ";
+  name[StatInt___libc_memalign]          = "  __libc_memalign                 ";
   name[StatInt_calloc]                   = "  calloc                          ";
   name[StatInt_realloc]                  = "  realloc                         ";
   name[StatInt_free]                     = "  free                            ";
@@ -131,6 +138,7 @@
   name[StatInt_atexit]                   = "  atexit                          ";
   name[StatInt___cxa_guard_acquire]      = "  __cxa_guard_acquire             ";
   name[StatInt___cxa_guard_release]      = "  __cxa_guard_release             ";
+  name[StatInt___cxa_guard_abort]        = "  __cxa_guard_abort               ";
   name[StatInt_pthread_create]           = "  pthread_create                  ";
   name[StatInt_pthread_join]             = "  pthread_join                    ";
   name[StatInt_pthread_detach]           = "  pthread_detach                  ";
@@ -173,6 +181,21 @@
   name[StatInt_sem_timedwait]            = "  sem_timedwait                   ";
   name[StatInt_sem_post]                 = "  sem_post                        ";
   name[StatInt_sem_getvalue]             = "  sem_getvalue                    ";
+  name[StatInt_open]                     = "  open                            ";
+  name[StatInt_creat]                    = "  creat                           ";
+  name[StatInt_dup]                      = "  dup                             ";
+  name[StatInt_dup2]                     = "  dup2                            ";
+  name[StatInt_dup3]                     = "  dup3                            ";
+  name[StatInt_eventfd]                  = "  eventfd                         ";
+  name[StatInt_socket]                   = "  socket                          ";
+  name[StatInt_connect]                  = "  connect                         ";
+  name[StatInt_accept]                   = "  accept                          ";
+  name[StatInt_accept4]                  = "  accept4                         ";
+  name[StatInt_epoll_create]             = "  epoll_create                    ";
+  name[StatInt_epoll_create1]            = "  epoll_create1                   ";
+  name[StatInt_close]                    = "  close                           ";
+  name[StatInt_pipe]                     = "  pipe                            ";
+  name[StatInt_pipe2]                    = "  pipe2                           ";
   name[StatInt_read]                     = "  read                            ";
   name[StatInt_pread]                    = "  pread                           ";
   name[StatInt_pread64]                  = "  pread64                         ";
@@ -196,7 +219,12 @@
   name[StatInt_opendir]                  = "  opendir                         ";
   name[StatInt_epoll_ctl]                = "  epoll_ctl                       ";
   name[StatInt_epoll_wait]               = "  epoll_wait                      ";
+  name[StatInt_poll]                     = "  poll                            ";
   name[StatInt_sigaction]                = "  sigaction                       ";
+  name[StatInt_sleep]                    = "  sleep                           ";
+  name[StatInt_usleep]                   = "  usleep                          ";
+  name[StatInt_nanosleep]                = "  nanosleep                       ";
+  name[StatInt_gettimeofday]             = "  gettimeofday                    ";
 
   name[StatAnnotation]                   = "Dynamic annotations               ";
   name[StatAnnotateHappensBefore]        = "  HappensBefore                   ";
@@ -240,10 +268,11 @@
   name[StatMtxSlab]                      = "  Slab                            ";
   name[StatMtxAtExit]                    = "  Atexit                          ";
   name[StatMtxAnnotations]               = "  Annotations                     ";
+  name[StatMtxMBlock]                    = "  MBlock                          ";
 
-  TsanPrintf("Statistics:\n");
+  Printf("Statistics:\n");
   for (int i = 0; i < StatCnt; i++)
-    TsanPrintf("%s: %zu\n", name[i], (uptr)stat[i]);
+    Printf("%s: %zu\n", name[i], (uptr)stat[i]);
 }
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_stat.h b/lib/tsan/rtl/tsan_stat.h
index 16c43f1..b144ba7 100644
--- a/lib/tsan/rtl/tsan_stat.h
+++ b/lib/tsan/rtl/tsan_stat.h
@@ -77,6 +77,7 @@
   StatAtomicFetchAnd,
   StatAtomicFetchOr,
   StatAtomicFetchXor,
+  StatAtomicFetchNand,
   StatAtomicCAS,
   StatAtomicFence,
   StatAtomicRelaxed,
@@ -89,12 +90,14 @@
   StatAtomic2,
   StatAtomic4,
   StatAtomic8,
+  StatAtomic16,
 
   // Interceptors.
   StatInterceptor,
   StatInt_longjmp,
   StatInt_siglongjmp,
   StatInt_malloc,
+  StatInt___libc_memalign,
   StatInt_calloc,
   StatInt_realloc,
   StatInt_free,
@@ -132,6 +135,7 @@
   StatInt_atexit,
   StatInt___cxa_guard_acquire,
   StatInt___cxa_guard_release,
+  StatInt___cxa_guard_abort,
   StatInt_pthread_create,
   StatInt_pthread_join,
   StatInt_pthread_detach,
@@ -172,6 +176,21 @@
   StatInt_sem_timedwait,
   StatInt_sem_post,
   StatInt_sem_getvalue,
+  StatInt_open,
+  StatInt_creat,
+  StatInt_dup,
+  StatInt_dup2,
+  StatInt_dup3,
+  StatInt_eventfd,
+  StatInt_socket,
+  StatInt_connect,
+  StatInt_accept,
+  StatInt_accept4,
+  StatInt_epoll_create,
+  StatInt_epoll_create1,
+  StatInt_close,
+  StatInt_pipe,
+  StatInt_pipe2,
   StatInt_read,
   StatInt_pread,
   StatInt_pread64,
@@ -195,6 +214,7 @@
   StatInt_opendir,
   StatInt_epoll_ctl,
   StatInt_epoll_wait,
+  StatInt_poll,
   StatInt_sigaction,
   StatInt_signal,
   StatInt_raise,
@@ -203,6 +223,7 @@
   StatInt_sleep,
   StatInt_usleep,
   StatInt_nanosleep,
+  StatInt_gettimeofday,
 
   // Dynamic annotations.
   StatAnnotation,
@@ -249,6 +270,7 @@
   StatMtxSlab,
   StatMtxAnnotations,
   StatMtxAtExit,
+  StatMtxMBlock,
 
   // This must be the last.
   StatCnt
diff --git a/lib/tsan/rtl/tsan_suppressions.cc b/lib/tsan/rtl/tsan_suppressions.cc
index acb95e5..5316f6d 100644
--- a/lib/tsan/rtl/tsan_suppressions.cc
+++ b/lib/tsan/rtl/tsan_suppressions.cc
@@ -27,25 +27,25 @@
   if (filename == 0 || filename[0] == 0)
     return 0;
   InternalScopedBuffer<char> tmp(4*1024);
-  if (filename[0] == '/')
+  if (filename[0] == '/' || GetPwd() == 0)
     internal_snprintf(tmp.data(), tmp.size(), "%s", filename);
   else
     internal_snprintf(tmp.data(), tmp.size(), "%s/%s", GetPwd(), filename);
   fd_t fd = internal_open(tmp.data(), false);
   if (fd == kInvalidFd) {
-    TsanPrintf("ThreadSanitizer: failed to open suppressions file '%s'\n",
+    Printf("ThreadSanitizer: failed to open suppressions file '%s'\n",
                tmp.data());
     Die();
   }
   const uptr fsize = internal_filesize(fd);
   if (fsize == (uptr)-1) {
-    TsanPrintf("ThreadSanitizer: failed to stat suppressions file '%s'\n",
+    Printf("ThreadSanitizer: failed to stat suppressions file '%s'\n",
                tmp.data());
     Die();
   }
   char *buf = (char*)internal_alloc(MBlockSuppression, fsize + 1);
   if (fsize != internal_read(fd, buf, fsize)) {
-    TsanPrintf("ThreadSanitizer: failed to read suppressions file '%s'\n",
+    Printf("ThreadSanitizer: failed to read suppressions file '%s'\n",
                tmp.data());
     Die();
   }
@@ -110,7 +110,7 @@
         stype = SuppressionSignal;
         line += sizeof("signal:") - 1;
       } else {
-        TsanPrintf("ThreadSanitizer: failed to parse suppressions file\n");
+        Printf("ThreadSanitizer: failed to parse suppressions file\n");
         Die();
       }
       Suppression *s = (Suppression*)internal_alloc(MBlockSuppression,
diff --git a/lib/tsan/rtl/tsan_symbolize.cc b/lib/tsan/rtl/tsan_symbolize.cc
index 7867179..48bee67 100644
--- a/lib/tsan/rtl/tsan_symbolize.cc
+++ b/lib/tsan/rtl/tsan_symbolize.cc
@@ -52,7 +52,7 @@
 }
 
 ReportStack *SymbolizeCode(uptr addr) {
-  if (0 != internal_strcmp(flags()->external_symbolizer_path, "")) {
+  if (flags()->external_symbolizer_path[0]) {
     static const uptr kMaxAddrFrames = 16;
     InternalScopedBuffer<AddressInfo> addr_frames(kMaxAddrFrames);
     for (uptr i = 0; i < kMaxAddrFrames; i++)
@@ -79,6 +79,12 @@
 }
 
 ReportStack *SymbolizeData(uptr addr) {
+  if (flags()->external_symbolizer_path[0]) {
+    AddressInfo frame;
+    if (!__sanitizer::SymbolizeData(addr, &frame))
+      return 0;
+    return NewReportStackEntry(frame);
+  }
   return SymbolizeDataAddr2Line(addr);
 }
 
diff --git a/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc b/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc
index 7d0fddb..fc7144e 100644
--- a/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc
+++ b/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc
@@ -50,17 +50,17 @@
 static void NOINLINE InitModule(ModuleDesc *m) {
   int outfd[2] = {};
   if (pipe(&outfd[0])) {
-    TsanPrintf("ThreadSanitizer: outfd pipe() failed (%d)\n", errno);
+    Printf("ThreadSanitizer: outfd pipe() failed (%d)\n", errno);
     Die();
   }
   int infd[2] = {};
   if (pipe(&infd[0])) {
-    TsanPrintf("ThreadSanitizer: infd pipe() failed (%d)\n", errno);
+    Printf("ThreadSanitizer: infd pipe() failed (%d)\n", errno);
     Die();
   }
   int pid = fork();
   if (pid == 0) {
-    flags()->log_fileno = STDERR_FILENO;
+    __sanitizer_set_report_fd(STDERR_FILENO);
     internal_close(STDOUT_FILENO);
     internal_close(STDIN_FILENO);
     internal_dup2(outfd[0], STDIN_FILENO);
@@ -74,7 +74,7 @@
     execl("/usr/bin/addr2line", "/usr/bin/addr2line", "-Cfe", m->fullname, 0);
     _exit(0);
   } else if (pid < 0) {
-    TsanPrintf("ThreadSanitizer: failed to fork symbolizer\n");
+    Printf("ThreadSanitizer: failed to fork symbolizer\n");
     Die();
   }
   internal_close(outfd[0]);
@@ -155,14 +155,14 @@
   char addrstr[32];
   internal_snprintf(addrstr, sizeof(addrstr), "%p\n", (void*)offset);
   if (0 >= internal_write(m->out_fd, addrstr, internal_strlen(addrstr))) {
-    TsanPrintf("ThreadSanitizer: can't write from symbolizer (%d, %d)\n",
+    Printf("ThreadSanitizer: can't write from symbolizer (%d, %d)\n",
         m->out_fd, errno);
     Die();
   }
   InternalScopedBuffer<char> func(1024);
   ssize_t len = internal_read(m->inp_fd, func.data(), func.size() - 1);
   if (len <= 0) {
-    TsanPrintf("ThreadSanitizer: can't read from symbolizer (%d, %d)\n",
+    Printf("ThreadSanitizer: can't read from symbolizer (%d, %d)\n",
         m->inp_fd, errno);
     Die();
   }
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
index 642d1b2..38ecc6e 100644
--- a/lib/tsan/rtl/tsan_sync.cc
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -17,9 +17,10 @@
 
 namespace __tsan {
 
-SyncVar::SyncVar(uptr addr)
+SyncVar::SyncVar(uptr addr, u64 uid)
   : mtx(MutexTypeSyncVar, StatMtxSyncVar)
   , addr(addr)
+  , uid(uid)
   , owner_tid(kInvalidTid)
   , last_lock()
   , recursion()
@@ -47,8 +48,17 @@
   }
 }
 
+SyncVar* SyncTab::GetOrCreateAndLock(ThreadState *thr, uptr pc,
+                                     uptr addr, bool write_lock) {
+  return GetAndLock(thr, pc, addr, write_lock, true);
+}
+
+SyncVar* SyncTab::GetIfExistsAndLock(uptr addr, bool write_lock) {
+  return GetAndLock(0, 0, addr, write_lock, false);
+}
+
 SyncVar* SyncTab::GetAndLock(ThreadState *thr, uptr pc,
-                             uptr addr, bool write_lock) {
+                             uptr addr, bool write_lock, bool create) {
 #ifndef TSAN_GO
   if (PrimaryAllocator::PointerIsMine((void*)addr)) {
     MBlock *b = user_mblock(thr, (void*)addr);
@@ -59,9 +69,12 @@
         break;
     }
     if (res == 0) {
+      if (!create)
+        return 0;
       StatInc(thr, StatSyncCreated);
       void *mem = internal_alloc(MBlockSync, sizeof(SyncVar));
-      res = new(mem) SyncVar(addr);
+      const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
+      res = new(mem) SyncVar(addr, uid);
       res->creation_stack.ObtainCurrent(thr, pc);
       res->next = b->head;
       b->head = res;
@@ -87,6 +100,8 @@
       }
     }
   }
+  if (!create)
+    return 0;
   {
     Lock l(&p->mtx);
     SyncVar *res = p->val;
@@ -97,7 +112,8 @@
     if (res == 0) {
       StatInc(thr, StatSyncCreated);
       void *mem = internal_alloc(MBlockSync, sizeof(SyncVar));
-      res = new(mem) SyncVar(addr);
+      const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
+      res = new(mem) SyncVar(addr, uid);
 #ifndef TSAN_GO
       res->creation_stack.ObtainCurrent(thr, pc);
 #endif
diff --git a/lib/tsan/rtl/tsan_sync.h b/lib/tsan/rtl/tsan_sync.h
index 89de81d..34ea55b 100644
--- a/lib/tsan/rtl/tsan_sync.h
+++ b/lib/tsan/rtl/tsan_sync.h
@@ -50,12 +50,13 @@
 };
 
 struct SyncVar {
-  explicit SyncVar(uptr addr);
+  explicit SyncVar(uptr addr, u64 uid);
 
   static const int kInvalidTid = -1;
 
   Mutex mtx;
   const uptr addr;
+  const u64 uid;  // Globally unique id.
   SyncClock clock;
   SyncClock read_clock;  // Used for rw mutexes only.
   StackTrace creation_stack;
@@ -69,6 +70,18 @@
   SyncVar *next;  // In SyncTab hashtable.
 
   uptr GetMemoryConsumption();
+  u64 GetId() const {
+    // 47 lsb is addr, then 14 bits is low part of uid, then 3 zero bits.
+    return GetLsb((u64)addr | (uid << 47), 61);
+  }
+  bool CheckId(u64 uid) const {
+    CHECK_EQ(uid, GetLsb(uid, 14));
+    return GetLsb(this->uid, 14) == uid;
+  }
+  static uptr SplitId(u64 id, u64 *uid) {
+    *uid = id >> 47;
+    return (uptr)GetLsb(id, 47);
+  }
 };
 
 class SyncTab {
@@ -76,9 +89,9 @@
   SyncTab();
   ~SyncTab();
 
-  // If the SyncVar does not exist yet, it is created.
-  SyncVar* GetAndLock(ThreadState *thr, uptr pc,
-                      uptr addr, bool write_lock);
+  SyncVar* GetOrCreateAndLock(ThreadState *thr, uptr pc,
+                              uptr addr, bool write_lock);
+  SyncVar* GetIfExistsAndLock(uptr addr, bool write_lock);
 
   // If the SyncVar does not exist, returns 0.
   SyncVar* GetAndRemove(ThreadState *thr, uptr pc, uptr addr);
@@ -96,9 +109,13 @@
   // FIXME: Implement something more sane.
   static const int kPartCount = 1009;
   Part tab_[kPartCount];
+  atomic_uint64_t uid_gen_;
 
   int PartIdx(uptr addr);
 
+  SyncVar* GetAndLock(ThreadState *thr, uptr pc,
+                      uptr addr, bool write_lock, bool create);
+
   SyncTab(const SyncTab&);  // Not implemented.
   void operator = (const SyncTab&);  // Not implemented.
 };
diff --git a/lib/tsan/rtl/tsan_trace.h b/lib/tsan/rtl/tsan_trace.h
index fd96482..7df7160 100644
--- a/lib/tsan/rtl/tsan_trace.h
+++ b/lib/tsan/rtl/tsan_trace.h
@@ -16,15 +16,13 @@
 #include "tsan_defs.h"
 #include "tsan_mutex.h"
 #include "tsan_sync.h"
+#include "tsan_mutexset.h"
 
 namespace __tsan {
 
-#ifndef TSAN_HISTORY_SIZE  // in kibitraces
-#define TSAN_HISTORY_SIZE 128
-#endif
-
-const int kTracePartSize = 16 * 1024;
-const int kTraceParts = TSAN_HISTORY_SIZE * 1024 / kTracePartSize;
+const int kTracePartSizeBits = 14;
+const int kTracePartSize = 1 << kTracePartSizeBits;
+const int kTraceParts = 4 * 1024 * 1024 / kTracePartSize;
 const int kTraceSize = kTracePartSize * kTraceParts;
 
 // Must fit into 3 bits.
@@ -46,6 +44,7 @@
 struct TraceHeader {
   StackTrace stack0;  // Start stack for the trace.
   u64        epoch0;  // Start epoch for the trace.
+  MutexSet   mset0;
 #ifndef TSAN_GO
   uptr       stack0buf[kTraceStackSize];
 #endif
@@ -61,7 +60,6 @@
 };
 
 struct Trace {
-  Event events[kTraceSize];
   TraceHeader headers[kTraceParts];
   Mutex mtx;
 
diff --git a/lib/tsan/rtl/tsan_update_shadow_word_inl.h b/lib/tsan/rtl/tsan_update_shadow_word_inl.h
index c7864ce..2c43555 100644
--- a/lib/tsan/rtl/tsan_update_shadow_word_inl.h
+++ b/lib/tsan/rtl/tsan_update_shadow_word_inl.h
@@ -34,7 +34,7 @@
     if (Shadow::TidsAreEqual(old, cur)) {
       StatInc(thr, StatShadowSameThread);
       if (OldIsInSameSynchEpoch(old, thr)) {
-        if (OldIsRWStronger(old, kAccessIsWrite)) {
+        if (OldIsRWNotWeaker(old, kAccessIsWrite)) {
           // found a slot that holds effectively the same info
           // (that is, same tid, same sync epoch and same size)
           StatInc(thr, StatMopSame);
@@ -43,7 +43,7 @@
         StoreIfNotYetStored(sp, &store_word);
         break;
       }
-      if (OldIsRWWeaker(old, kAccessIsWrite))
+      if (OldIsRWWeakerOrEqual(old, kAccessIsWrite))
         StoreIfNotYetStored(sp, &store_word);
       break;
     }
diff --git a/lib/tsan/tests/rtl/tsan_string.cc b/lib/tsan/tests/rtl/tsan_string.cc
index 75adc6c..c402f7c 100644
--- a/lib/tsan/tests/rtl/tsan_string.cc
+++ b/lib/tsan/tests/rtl/tsan_string.cc
@@ -46,6 +46,9 @@
   t2.Memcpy(data, data2, 10, true);
 }
 
+// The test fails with TSAN_SHADOW_COUNT=2,
+// because the old racy access is evicted.
+#if defined(TSAN_SHADOW_COUNT) && TSAN_SHADOW_COUNT >= 4
 TEST(ThreadSanitizer, MemcpyRace2) {
   char *data = new char[10];
   char *data1 = new char[10];
@@ -54,6 +57,7 @@
   t1.Memcpy(data+5, data1, 1);
   t2.Memcpy(data+3, data2, 4, true);
 }
+#endif
 
 TEST(ThreadSanitizer, MemcpyRace3) {
   char *data = new char[10];
diff --git a/lib/tsan/tests/unit/CMakeLists.txt b/lib/tsan/tests/unit/CMakeLists.txt
index 5608e24..52ebdb8 100644
--- a/lib/tsan/tests/unit/CMakeLists.txt
+++ b/lib/tsan/tests/unit/CMakeLists.txt
@@ -4,7 +4,6 @@
   tsan_mman_test.cc
   tsan_mutex_test.cc
   tsan_platform_test.cc
-  tsan_printf_test.cc
   tsan_shadow_test.cc
   tsan_stack_test.cc
   tsan_suppressions_test.cc
diff --git a/lib/tsan/tests/unit/tsan_mutexset_test.cc b/lib/tsan/tests/unit/tsan_mutexset_test.cc
new file mode 100644
index 0000000..da1ae2e
--- /dev/null
+++ b/lib/tsan/tests/unit/tsan_mutexset_test.cc
@@ -0,0 +1,126 @@
+//===-- tsan_mutexset_test.cc ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_mutexset.h"
+#include "gtest/gtest.h"
+
+namespace __tsan {
+
+static void Expect(const MutexSet &mset, uptr i, u64 id, bool write, u64 epoch,
+    int count) {
+  MutexSet::Desc d = mset.Get(i);
+  EXPECT_EQ(id, d.id);
+  EXPECT_EQ(write, d.write);
+  EXPECT_EQ(epoch, d.epoch);
+  EXPECT_EQ(count, d.count);
+}
+
+TEST(MutexSet, Basic) {
+  MutexSet mset;
+  EXPECT_EQ(mset.Size(), (uptr)0);
+
+  mset.Add(1, true, 2);
+  EXPECT_EQ(mset.Size(), (uptr)1);
+  Expect(mset, 0, 1, true, 2, 1);
+  mset.Del(1, true);
+  EXPECT_EQ(mset.Size(), (uptr)0);
+
+  mset.Add(3, true, 4);
+  mset.Add(5, false, 6);
+  EXPECT_EQ(mset.Size(), (uptr)2);
+  Expect(mset, 0, 3, true, 4, 1);
+  Expect(mset, 1, 5, false, 6, 1);
+  mset.Del(3, true);
+  EXPECT_EQ(mset.Size(), (uptr)1);
+  mset.Del(5, false);
+  EXPECT_EQ(mset.Size(), (uptr)0);
+}
+
+TEST(MutexSet, DoubleAdd) {
+  MutexSet mset;
+  mset.Add(1, true, 2);
+  EXPECT_EQ(mset.Size(), (uptr)1);
+  Expect(mset, 0, 1, true, 2, 1);
+
+  mset.Add(1, true, 2);
+  EXPECT_EQ(mset.Size(), (uptr)1);
+  Expect(mset, 0, 1, true, 2, 2);
+
+  mset.Del(1, true);
+  EXPECT_EQ(mset.Size(), (uptr)1);
+  Expect(mset, 0, 1, true, 2, 1);
+
+  mset.Del(1, true);
+  EXPECT_EQ(mset.Size(), (uptr)0);
+}
+
+TEST(MutexSet, DoubleDel) {
+  MutexSet mset;
+  mset.Add(1, true, 2);
+  EXPECT_EQ(mset.Size(), (uptr)1);
+  mset.Del(1, true);
+  EXPECT_EQ(mset.Size(), (uptr)0);
+  mset.Del(1, true);
+  EXPECT_EQ(mset.Size(), (uptr)0);
+}
+
+TEST(MutexSet, Remove) {
+  MutexSet mset;
+  mset.Add(1, true, 2);
+  mset.Add(1, true, 2);
+  mset.Add(3, true, 4);
+  mset.Add(3, true, 4);
+  EXPECT_EQ(mset.Size(), (uptr)2);
+
+  mset.Remove(1);
+  EXPECT_EQ(mset.Size(), (uptr)1);
+  Expect(mset, 0, 3, true, 4, 2);
+}
+
+TEST(MutexSet, Full) {
+  MutexSet mset;
+  for (uptr i = 0; i < MutexSet::kMaxSize; i++) {
+    mset.Add(i, true, i + 1);
+  }
+  EXPECT_EQ(mset.Size(), MutexSet::kMaxSize);
+  for (uptr i = 0; i < MutexSet::kMaxSize; i++) {
+    Expect(mset, i, i, true, i + 1, 1);
+  }
+
+  for (uptr i = 0; i < MutexSet::kMaxSize; i++) {
+    mset.Add(i, true, i + 1);
+  }
+  EXPECT_EQ(mset.Size(), MutexSet::kMaxSize);
+  for (uptr i = 0; i < MutexSet::kMaxSize; i++) {
+    Expect(mset, i, i, true, i + 1, 2);
+  }
+}
+
+TEST(MutexSet, Overflow) {
+  MutexSet mset;
+  for (uptr i = 0; i < MutexSet::kMaxSize; i++) {
+    mset.Add(i, true, i + 1);
+    mset.Add(i, true, i + 1);
+  }
+  mset.Add(100, true, 200);
+  EXPECT_EQ(mset.Size(), MutexSet::kMaxSize);
+  for (uptr i = 0; i < MutexSet::kMaxSize; i++) {
+    if (i == 0)
+      Expect(mset, i, 63, true, 64, 2);
+    else if (i == MutexSet::kMaxSize - 1)
+      Expect(mset, i, 100, true, 200, 1);
+    else
+      Expect(mset, i, i, true, i + 1, 2);
+  }
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/tests/unit/tsan_platform_test.cc b/lib/tsan/tests/unit/tsan_platform_test.cc
index 64c4499..b43dbb4 100644
--- a/lib/tsan/tests/unit/tsan_platform_test.cc
+++ b/lib/tsan/tests/unit/tsan_platform_test.cc
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 #include "sanitizer_common/sanitizer_libc.h"
 #include "tsan_platform.h"
+#include "tsan_rtl.h"
 #include "gtest/gtest.h"
 
 namespace __tsan {
diff --git a/lib/tsan/tests/unit/tsan_shadow_test.cc b/lib/tsan/tests/unit/tsan_shadow_test.cc
index 41f9121..fa9c982 100644
--- a/lib/tsan/tests/unit/tsan_shadow_test.cc
+++ b/lib/tsan/tests/unit/tsan_shadow_test.cc
@@ -11,10 +11,41 @@
 //
 //===----------------------------------------------------------------------===//
 #include "tsan_platform.h"
+#include "tsan_rtl.h"
 #include "gtest/gtest.h"
 
 namespace __tsan {
 
+TEST(Shadow, FastState) {
+  Shadow s(FastState(11, 22));
+  EXPECT_EQ(s.tid(), (u64)11);
+  EXPECT_EQ(s.epoch(), (u64)22);
+  EXPECT_EQ(s.GetIgnoreBit(), false);
+  EXPECT_EQ(s.GetFreedAndReset(), false);
+  EXPECT_EQ(s.GetHistorySize(), 0);
+  EXPECT_EQ(s.addr0(), (u64)0);
+  EXPECT_EQ(s.size(), (u64)1);
+  EXPECT_EQ(s.is_write(), false);
+
+  s.IncrementEpoch();
+  EXPECT_EQ(s.epoch(), (u64)23);
+  s.IncrementEpoch();
+  EXPECT_EQ(s.epoch(), (u64)24);
+
+  s.SetIgnoreBit();
+  EXPECT_EQ(s.GetIgnoreBit(), true);
+  s.ClearIgnoreBit();
+  EXPECT_EQ(s.GetIgnoreBit(), false);
+
+  for (int i = 0; i < 8; i++) {
+    s.SetHistorySize(i);
+    EXPECT_EQ(s.GetHistorySize(), i);
+  }
+  s.SetHistorySize(2);
+  s.ClearHistorySize();
+  EXPECT_EQ(s.GetHistorySize(), 0);
+}
+
 TEST(Shadow, Mapping) {
   static int global;
   int stack;
diff --git a/lib/tsan/tests/unit/tsan_sync_test.cc b/lib/tsan/tests/unit/tsan_sync_test.cc
index b7605a5..dddf0b2 100644
--- a/lib/tsan/tests/unit/tsan_sync_test.cc
+++ b/lib/tsan/tests/unit/tsan_sync_test.cc
@@ -36,7 +36,7 @@
     uintptr_t addr = rand_r(&seed) % (kRange - 1) + 1;
     if (rand_r(&seed) % 2) {
       // Get or add.
-      SyncVar *v = tab.GetAndLock(thr, pc, addr, true);
+      SyncVar *v = tab.GetOrCreateAndLock(thr, pc, addr, true);
       EXPECT_TRUE(golden[addr] == 0 || golden[addr] == v);
       EXPECT_EQ(v->addr, addr);
       golden[addr] = v;
diff --git a/lib/ubsan/CMakeLists.txt b/lib/ubsan/CMakeLists.txt
index 616e065..b16983d 100644
--- a/lib/ubsan/CMakeLists.txt
+++ b/lib/ubsan/CMakeLists.txt
@@ -14,21 +14,41 @@
 
 set(UBSAN_RUNTIME_LIBRARIES)
 
-if(CAN_TARGET_X86_64)
-  add_library(clang_rt.ubsan-x86_64 STATIC ${UBSAN_SOURCES})
-  set_target_compile_flags(clang_rt.ubsan-x86_64
-    ${UBSAN_CFLAGS} ${TARGET_X86_64_CFLAGS}
+if(APPLE)
+  # Build universal binary on APPLE.
+  add_library(clang_rt.ubsan_osx STATIC
+    ${UBSAN_SOURCES}
+    $<TARGET_OBJECTS:RTSanitizerCommon.osx>
     )
-  list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan-x86_64)
+  set_target_compile_flags(clang_rt.ubsan_osx ${UBSAN_CFLAGS})
+  filter_available_targets(UBSAN_TARGETS x86_64 i386)
+  set_target_properties(clang_rt.ubsan_osx PROPERTIES
+    OSX_ARCHITECTURES "${UBSAN_TARGETS}")
+  list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan_osx)
+else()
+  # Build separate libraries for each target.
+  if(CAN_TARGET_X86_64)
+    add_library(clang_rt.ubsan-x86_64 STATIC
+      ${UBSAN_SOURCES}
+      $<TARGET_OBJECTS:RTSanitizerCommon.x86_64>
+      )
+    set_target_compile_flags(clang_rt.ubsan-x86_64
+      ${UBSAN_CFLAGS} ${TARGET_X86_64_CFLAGS}
+      )
+    list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan-x86_64)
+  endif()
+  if(CAN_TARGET_I386)
+    add_library(clang_rt.ubsan-i386 STATIC
+      ${UBSAN_SOURCES}
+      $<TARGET_OBJECTS:RTSanitizerCommon.i386>
+      )
+    set_target_compile_flags(clang_rt.ubsan-i386
+      ${UBSAN_CFLAGS} ${TARGET_I386_CFLAGS}
+      )
+    list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan-i386)
+  endif()
 endif()
 
-if(CAN_TARGET_I386)
-  add_library(clang_rt.ubsan-i386 STATIC ${UBSAN_SOURCES})
-  set_target_compile_flags(clang_rt.ubsan-i386
-    ${UBSAN_CFLAGS} ${TARGET_I386_CFLAGS}
-    )
-  list(APPEND UBSAN_RUNTIME_LIBRARIES clang_rt.ubsan-i386)
-endif()
 
 set_property(TARGET ${UBSAN_RUNTIME_LIBRARIES} APPEND PROPERTY
   COMPILE_DEFINITIONS ${UBSAN_COMMON_DEFINITIONS})
diff --git a/lib/ubsan/Makefile.mk b/lib/ubsan/Makefile.mk
new file mode 100644
index 0000000..5702e0e
--- /dev/null
+++ b/lib/ubsan/Makefile.mk
@@ -0,0 +1,23 @@
+#===- lib/ubsan/Makefile.mk ---------------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := ubsan
+SubDirs :=
+
+Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
+ObjNames := $(Sources:%.cc=%.o)
+
+Implementation := Generic
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard $(Dir)/*.h)
+Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
+
+# Define a convenience variable for all the ubsan functions.
+UbsanFunctions := $(Sources:%.cc=%)
diff --git a/lib/ubsan/lit_tests/Float/cast-overflow.cpp b/lib/ubsan/lit_tests/Float/cast-overflow.cpp
index d04f38c..63410dc 100644
--- a/lib/ubsan/lit_tests/Float/cast-overflow.cpp
+++ b/lib/ubsan/lit_tests/Float/cast-overflow.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -fcatch-undefined-behavior %s -o %t
+// RUN: %clang -fsanitize=float-cast-overflow %s -o %t
 // RUN: %t _
 // RUN: %t 0 2>&1 | FileCheck %s --check-prefix=CHECK-0
 // RUN: %t 1 2>&1 | FileCheck %s --check-prefix=CHECK-1
@@ -14,6 +14,7 @@
 // This test assumes float and double are IEEE-754 single- and double-precision.
 
 #include <stdint.h>
+#include <stdio.h>
 #include <string.h>
 
 float Inf;
@@ -30,8 +31,10 @@
   float MaxFloatRepresentableAsUInt = 0xffffff00u;
   (unsigned int)MaxFloatRepresentableAsUInt; // ok
 
+#ifdef __SIZEOF_INT128__
   unsigned __int128 FloatMaxAsUInt128 = -((unsigned __int128)1 << 104);
   (void)(float)FloatMaxAsUInt128; // ok
+#endif
 
   // Build a '+Inf'.
   char InfVal[] = { 0x00, 0x00, 0x80, 0x7f };
@@ -50,41 +53,46 @@
   case '0':
     // Note that values between 0x7ffffe00 and 0x80000000 may or may not
     // successfully round-trip, depending on the rounding mode.
-    // CHECK-0: fatal error: value 2.14748{{.*}} is outside the range of representable values of type 'int'
+    // CHECK-0: runtime error: value 2.14748{{.*}} is outside the range of representable values of type 'int'
     return MaxFloatRepresentableAsInt + 0x80;
   case '1':
-    // CHECK-1: fatal error: value -2.14748{{.*}} is outside the range of representable values of type 'int'
+    // CHECK-1: runtime error: value -2.14748{{.*}} is outside the range of representable values of type 'int'
     return MinFloatRepresentableAsInt - 0x100;
   case '2':
-    // CHECK-2: fatal error: value -0.001 is outside the range of representable values of type 'unsigned int'
+    // CHECK-2: runtime error: value -0.001 is outside the range of representable values of type 'unsigned int'
     return (unsigned)-0.001;
   case '3':
-    // CHECK-3: fatal error: value 4.2949{{.*}} is outside the range of representable values of type 'unsigned int'
+    // CHECK-3: runtime error: value 4.2949{{.*}} is outside the range of representable values of type 'unsigned int'
     return (unsigned)(MaxFloatRepresentableAsUInt + 0x100);
 
   case '4':
-    // CHECK-4: fatal error: value {{.*}} is outside the range of representable values of type 'int'
+    // CHECK-4: runtime error: value {{.*}} is outside the range of representable values of type 'int'
     return Inf;
   case '5':
-    // CHECK-5: fatal error: value {{.*}} is outside the range of representable values of type 'int'
+    // CHECK-5: runtime error: value {{.*}} is outside the range of representable values of type 'int'
     return NaN;
 
     // Integer -> floating point overflow.
   case '6':
-    // CHECK-6: fatal error: value 0xffffff00000000000000000000000001 is outside the range of representable values of type 'float'
+    // CHECK-6: {{runtime error: value 0xffffff00000000000000000000000001 is outside the range of representable values of type 'float'|__int128 not supported}}
+#ifdef __SIZEOF_INT128__
     return (float)(FloatMaxAsUInt128 + 1);
+#else
+    puts("__int128 not supported");
+    return 0;
+#endif
   // FIXME: The backend cannot lower __fp16 operations on x86 yet.
   //case '7':
   //  (__fp16)65504; // ok
-  //  // CHECK-7: fatal error: value 65505 is outside the range of representable values of type '__fp16'
+  //  // CHECK-7: runtime error: value 65505 is outside the range of representable values of type '__fp16'
   //  return (__fp16)65505;
 
     // Floating point -> floating point overflow.
   case '8':
-    // CHECK-8: fatal error: value 1e+39 is outside the range of representable values of type 'float'
+    // CHECK-8: runtime error: value 1e+39 is outside the range of representable values of type 'float'
     return (float)1e39;
   case '9':
-    // CHECK-9: fatal error: value {{.*}} is outside the range of representable values of type 'double'
+    // CHECK-9: runtime error: value {{.*}} is outside the range of representable values of type 'double'
     return (double)Inf;
   }
 }
diff --git a/lib/ubsan/lit_tests/Integer/add-overflow.cpp b/lib/ubsan/lit_tests/Integer/add-overflow.cpp
index 781ccf3..4477638 100644
--- a/lib/ubsan/lit_tests/Integer/add-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/add-overflow.cpp
@@ -1,8 +1,9 @@
-// RUN: %clang -DADD_I32 -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I32
-// RUN: %clang -DADD_I64 -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I64
-// RUN: %clang -DADD_I128 -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I128
+// RUN: %clang -DADD_I32 -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I32
+// RUN: %clang -DADD_I64 -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I64
+// RUN: %clang -DADD_I128 -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I128
 
 #include <stdint.h>
+#include <stdio.h>
 
 int main() {
   // These promote to 'int'.
@@ -12,16 +13,20 @@
 #ifdef ADD_I32
   int32_t k = 0x12345678;
   k += 0x789abcde;
-  // CHECK-ADD_I32: add-overflow.cpp:14:5: fatal error: signed integer overflow: 305419896 + 2023406814 cannot be represented in type 'int32_t' (aka 'int')
+  // CHECK-ADD_I32: add-overflow.cpp:[[@LINE-1]]:5: runtime error: signed integer overflow: 305419896 + 2023406814 cannot be represented in type 'int32_t' (aka 'int')
 #endif
 
 #ifdef ADD_I64
   (void)(int64_t(8000000000000000000ll) + int64_t(2000000000000000000ll));
-  // CHECK-ADD_I64: 8000000000000000000 + 2000000000000000000 cannot be represented in type 'long'
+  // CHECK-ADD_I64: 8000000000000000000 + 2000000000000000000 cannot be represented in type '{{long( long)?}}'
 #endif
 
 #ifdef ADD_I128
+# ifdef __SIZEOF_INT128__
   (void)((__int128_t(1) << 126) + (__int128_t(1) << 126));
-  // CHECK-ADD_I128: 0x40000000000000000000000000000000 + 0x40000000000000000000000000000000 cannot be represented in type '__int128'
+# else
+  puts("__int128 not supported");
+# endif
+  // CHECK-ADD_I128: {{0x40000000000000000000000000000000 \+ 0x40000000000000000000000000000000 cannot be represented in type '__int128'|__int128 not supported}}
 #endif
 }
diff --git a/lib/ubsan/lit_tests/Integer/div-overflow.cpp b/lib/ubsan/lit_tests/Integer/div-overflow.cpp
index fb0e7d1..dd82427 100644
--- a/lib/ubsan/lit_tests/Integer/div-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/div-overflow.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
 
 #include <stdint.h>
 
 int main() {
   unsigned(0x80000000) / -1;
 
-  // CHECK: div-overflow.cpp:9:23: fatal error: division of -2147483648 by -1 cannot be represented in type 'int'
+  // CHECK: div-overflow.cpp:9:23: runtime error: division of -2147483648 by -1 cannot be represented in type 'int'
   int32_t(0x80000000) / -1;
 }
diff --git a/lib/ubsan/lit_tests/Integer/div-zero.cpp b/lib/ubsan/lit_tests/Integer/div-zero.cpp
index 0c1c3f8..b2a8395 100644
--- a/lib/ubsan/lit_tests/Integer/div-zero.cpp
+++ b/lib/ubsan/lit_tests/Integer/div-zero.cpp
@@ -1,9 +1,15 @@
-// RUN: %clang -fcatch-undefined-behavior -DDIVIDEND=0 %s -o %t && %t 2>&1 | FileCheck %s
-// RUN: %clang -fcatch-undefined-behavior -DDIVIDEND=1U %s -o %t && %t 2>&1 | FileCheck %s
-// RUN: %clang -fcatch-undefined-behavior -DDIVIDEND=1.5 %s -o %t && %t 2>&1 | FileCheck %s
-// RUN: %clang -fcatch-undefined-behavior -DDIVIDEND='__int128(123)' %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=integer-divide-by-zero -DDIVIDEND=0 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=integer-divide-by-zero -DDIVIDEND=1U %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=float-divide-by-zero -DDIVIDEND=1.5 %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=integer-divide-by-zero -DDIVIDEND='intmax(123)' %s -o %t && %t 2>&1 | FileCheck %s
+
+#ifdef __SIZEOF_INT128__
+typedef __int128 intmax;
+#else
+typedef long long intmax;
+#endif
 
 int main() {
-  // CHECK: div-zero.cpp:8:12: fatal error: division by zero
+  // CHECK: div-zero.cpp:[[@LINE+1]]:12: runtime error: division by zero
   DIVIDEND / 0;
 }
diff --git a/lib/ubsan/lit_tests/Integer/incdec-overflow.cpp b/lib/ubsan/lit_tests/Integer/incdec-overflow.cpp
index 8c8800b..48b68b6 100644
--- a/lib/ubsan/lit_tests/Integer/incdec-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/incdec-overflow.cpp
@@ -1,7 +1,7 @@
-// RUN: %clang -DOP=n++ -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s
-// RUN: %clang -DOP=++n -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s
-// RUN: %clang -DOP=m-- -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s
-// RUN: %clang -DOP=--m -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -DOP=n++ -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -DOP=++n -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -DOP=m-- -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -DOP=--m -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
 
 #include <stdint.h>
 
@@ -10,7 +10,7 @@
   n++;
   n++;
   int m = -n - 1;
-  // CHECK: incdec-overflow.cpp:15:3: fatal error: signed integer overflow: [[MINUS:-?]]214748364
+  // CHECK: incdec-overflow.cpp:15:3: runtime error: signed integer overflow: [[MINUS:-?]]214748364
   // CHECK: + [[MINUS]]1 cannot be represented in type 'int'
   OP;
 }
diff --git a/lib/ubsan/lit_tests/Integer/mul-overflow.cpp b/lib/ubsan/lit_tests/Integer/mul-overflow.cpp
index 1ba968d..8d1e70d 100644
--- a/lib/ubsan/lit_tests/Integer/mul-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/mul-overflow.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
 
 #include <stdint.h>
 
@@ -9,6 +9,6 @@
   (void)(uint16_t(0xffff) * int16_t(0x7fff));
   (void)(uint16_t(0xffff) * uint16_t(0x8000));
 
-  // CHECK: mul-overflow.cpp:13:27: fatal error: signed integer overflow: 65535 * 32769 cannot be represented in type 'int'
+  // CHECK: mul-overflow.cpp:13:27: runtime error: signed integer overflow: 65535 * 32769 cannot be represented in type 'int'
   (void)(uint16_t(0xffff) * uint16_t(0x8001));
 }
diff --git a/lib/ubsan/lit_tests/Integer/negate-overflow.cpp b/lib/ubsan/lit_tests/Integer/negate-overflow.cpp
index 58f1b48..e3beb6b 100644
--- a/lib/ubsan/lit_tests/Integer/negate-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/negate-overflow.cpp
@@ -1,7 +1,7 @@
-// RUN: %clang -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
 
 int main() {
   -unsigned(-0x7fffffff - 1); // ok
-  // CHECK: negate-overflow.cpp:6:10: fatal error: negation of -2147483648 cannot be represented in type 'int'; cast to an unsigned type to negate this value to itself
+  // CHECK: negate-overflow.cpp:6:10: runtime error: negation of -2147483648 cannot be represented in type 'int'; cast to an unsigned type to negate this value to itself
   return -(-0x7fffffff - 1);
 }
diff --git a/lib/ubsan/lit_tests/Integer/no-recover.cpp b/lib/ubsan/lit_tests/Integer/no-recover.cpp
new file mode 100644
index 0000000..08324bd
--- /dev/null
+++ b/lib/ubsan/lit_tests/Integer/no-recover.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang -fsanitize=unsigned-integer-overflow -Xclang -fsanitize-recover %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=RECOVER
+// RUN: %clang -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ABORT
+
+#include <stdint.h>
+
+int main() {
+  // These promote to 'int'.
+  (void)(uint8_t(0xff) + uint8_t(0xff));
+  (void)(uint16_t(0xf0fff) + uint16_t(0x0fff));
+  // RECOVER-NOT: runtime error
+  // ABORT-NOT: runtime error
+
+  uint32_t k = 0x87654321;
+  k += 0xedcba987;
+  // RECOVER: no-recover.cpp:14:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'uint32_t' (aka 'unsigned int')
+  // ABORT: no-recover.cpp:14:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'uint32_t' (aka 'unsigned int')
+
+  (void)(uint64_t(10000000000000000000ull) + uint64_t(9000000000000000000ull));
+  // RECOVER: 10000000000000000000 + 9000000000000000000 cannot be represented in type 'unsigned long'
+  // ABORT-NOT: runtime error
+}
diff --git a/lib/ubsan/lit_tests/Integer/shift.cpp b/lib/ubsan/lit_tests/Integer/shift.cpp
index 33a1531..19101c5 100644
--- a/lib/ubsan/lit_tests/Integer/shift.cpp
+++ b/lib/ubsan/lit_tests/Integer/shift.cpp
@@ -1,13 +1,13 @@
-// RUN: %clang -DLSH_OVERFLOW -DOP='<<' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=LSH_OVERFLOW
-// RUN: %clang -DLSH_OVERFLOW -DOP='<<=' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=LSH_OVERFLOW
-// RUN: %clang -DTOO_LOW -DOP='<<' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_LOW
-// RUN: %clang -DTOO_LOW -DOP='>>' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_LOW
-// RUN: %clang -DTOO_LOW -DOP='<<=' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_LOW
-// RUN: %clang -DTOO_LOW -DOP='>>=' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_LOW
-// RUN: %clang -DTOO_HIGH -DOP='<<' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_HIGH
-// RUN: %clang -DTOO_HIGH -DOP='>>' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_HIGH
-// RUN: %clang -DTOO_HIGH -DOP='<<=' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_HIGH
-// RUN: %clang -DTOO_HIGH -DOP='>>=' -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_HIGH
+// RUN: %clang -DLSH_OVERFLOW -DOP='<<' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=LSH_OVERFLOW
+// RUN: %clang -DLSH_OVERFLOW -DOP='<<=' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=LSH_OVERFLOW
+// RUN: %clang -DTOO_LOW -DOP='<<' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_LOW
+// RUN: %clang -DTOO_LOW -DOP='>>' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_LOW
+// RUN: %clang -DTOO_LOW -DOP='<<=' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_LOW
+// RUN: %clang -DTOO_LOW -DOP='>>=' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_LOW
+// RUN: %clang -DTOO_HIGH -DOP='<<' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_HIGH
+// RUN: %clang -DTOO_HIGH -DOP='>>' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_HIGH
+// RUN: %clang -DTOO_HIGH -DOP='<<=' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_HIGH
+// RUN: %clang -DTOO_HIGH -DOP='>>=' -fsanitize=shift %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=TOO_HIGH
 
 #include <stdint.h>
 
@@ -20,18 +20,18 @@
   b <<= 1; // still ok, unsigned
 
 #ifdef LSH_OVERFLOW
-  // CHECK-LSH_OVERFLOW: shift.cpp:24:5: fatal error: left shift of negative value -2147483648
+  // CHECK-LSH_OVERFLOW: shift.cpp:24:5: runtime error: left shift of negative value -2147483648
   a OP 1;
 #endif
 
 #ifdef TOO_LOW
-  // CHECK-TOO_LOW: shift.cpp:29:5: fatal error: shift exponent -3 is negative
+  // CHECK-TOO_LOW: shift.cpp:29:5: runtime error: shift exponent -3 is negative
   a OP (-3);
 #endif
 
 #ifdef TOO_HIGH
   a = 0;
-  // CHECK-TOO_HIGH: shift.cpp:35:5: fatal error: shift exponent 32 is too large for 32-bit type 'int'
+  // CHECK-TOO_HIGH: shift.cpp:35:5: runtime error: shift exponent 32 is too large for 32-bit type 'int'
   a OP 32;
 #endif
 }
diff --git a/lib/ubsan/lit_tests/Integer/sub-overflow.cpp b/lib/ubsan/lit_tests/Integer/sub-overflow.cpp
index 71925c9..b43a69b 100644
--- a/lib/ubsan/lit_tests/Integer/sub-overflow.cpp
+++ b/lib/ubsan/lit_tests/Integer/sub-overflow.cpp
@@ -1,8 +1,9 @@
-// RUN: %clang -DSUB_I32 -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I32
-// RUN: %clang -DSUB_I64 -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I64
-// RUN: %clang -DSUB_I128 -fcatch-undefined-behavior %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I128
+// RUN: %clang -DSUB_I32 -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I32
+// RUN: %clang -DSUB_I64 -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I64
+// RUN: %clang -DSUB_I128 -fsanitize=signed-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I128
 
 #include <stdint.h>
+#include <stdio.h>
 
 int main() {
   // These promote to 'int'.
@@ -11,16 +12,20 @@
 
 #ifdef SUB_I32
   (void)(int32_t(-2) - int32_t(0x7fffffff));
-  // CHECK-SUB_I32: sub-overflow.cpp:13:22: fatal error: signed integer overflow: -2 - 2147483647 cannot be represented in type 'int'
+  // CHECK-SUB_I32: sub-overflow.cpp:[[@LINE-1]]:22: runtime error: signed integer overflow: -2 - 2147483647 cannot be represented in type 'int'
 #endif
 
 #ifdef SUB_I64
   (void)(int64_t(-8000000000000000000ll) - int64_t(2000000000000000000ll));
-  // CHECK-SUB_I64: -8000000000000000000 - 2000000000000000000 cannot be represented in type 'long'
+  // CHECK-SUB_I64: -8000000000000000000 - 2000000000000000000 cannot be represented in type '{{long( long)?}}'
 #endif
 
 #ifdef SUB_I128
+# ifdef __SIZEOF_INT128__
   (void)(-(__int128_t(1) << 126) - (__int128_t(1) << 126) - 1);
-  // CHECK-SUB_I128: 0x80000000000000000000000000000000 - 1 cannot be represented in type '__int128'
+# else
+  puts("__int128 not supported");
+# endif
+  // CHECK-SUB_I128: {{0x80000000000000000000000000000000 - 1 cannot be represented in type '__int128'|__int128 not supported}}
 #endif
 }
diff --git a/lib/ubsan/lit_tests/Integer/uadd-overflow.cpp b/lib/ubsan/lit_tests/Integer/uadd-overflow.cpp
new file mode 100644
index 0000000..d7b43d0
--- /dev/null
+++ b/lib/ubsan/lit_tests/Integer/uadd-overflow.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang -DADD_I32 -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I32
+// RUN: %clang -DADD_I64 -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I64
+// RUN: %clang -DADD_I128 -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=ADD_I128
+
+#include <stdint.h>
+#include <stdio.h>
+
+int main() {
+  // These promote to 'int'.
+  (void)(uint8_t(0xff) + uint8_t(0xff));
+  (void)(uint16_t(0xf0fff) + uint16_t(0x0fff));
+
+#ifdef ADD_I32
+  uint32_t k = 0x87654321;
+  k += 0xedcba987;
+  // CHECK-ADD_I32: uadd-overflow.cpp:[[@LINE-1]]:5: runtime error: unsigned integer overflow: 2271560481 + 3989547399 cannot be represented in type 'uint32_t' (aka 'unsigned int')
+#endif
+
+#ifdef ADD_I64
+  (void)(uint64_t(10000000000000000000ull) + uint64_t(9000000000000000000ull));
+  // CHECK-ADD_I64: 10000000000000000000 + 9000000000000000000 cannot be represented in type 'unsigned long'
+#endif
+
+#ifdef ADD_I128
+# ifdef __SIZEOF_INT128__
+  (void)((__uint128_t(1) << 127) + (__uint128_t(1) << 127));
+# else
+  puts("__int128 not supported");
+# endif
+  // CHECK-ADD_I128: {{0x80000000000000000000000000000000 \+ 0x80000000000000000000000000000000 cannot be represented in type 'unsigned __int128'|__int128 not supported}}
+#endif
+}
diff --git a/lib/ubsan/lit_tests/Integer/uincdec-overflow.cpp b/lib/ubsan/lit_tests/Integer/uincdec-overflow.cpp
new file mode 100644
index 0000000..6b677ca
--- /dev/null
+++ b/lib/ubsan/lit_tests/Integer/uincdec-overflow.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang -DOP=n++ -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck --check-prefix=INC %s
+// RUN: %clang -DOP=++n -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck --check-prefix=INC %s
+// RUN: %clang -DOP=m-- -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck --check-prefix=DEC %s
+// RUN: %clang -DOP=--m -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck --check-prefix=DEC %s
+
+#include <stdint.h>
+
+int main() {
+  unsigned n = 0xfffffffd;
+  n++;
+  n++;
+  unsigned m = 0;
+  // CHECK-INC: uincdec-overflow.cpp:15:3: runtime error: unsigned integer overflow: 4294967295 + 1 cannot be represented in type 'unsigned int'
+  // CHECK-DEC: uincdec-overflow.cpp:15:3: runtime error: unsigned integer overflow: 0 - 1 cannot be represented in type 'unsigned int'
+  OP;
+}
diff --git a/lib/ubsan/lit_tests/Integer/umul-overflow.cpp b/lib/ubsan/lit_tests/Integer/umul-overflow.cpp
new file mode 100644
index 0000000..42cf3a7
--- /dev/null
+++ b/lib/ubsan/lit_tests/Integer/umul-overflow.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s
+
+#include <stdint.h>
+
+int main() {
+  // These promote to 'int'.
+  (void)(int8_t(-2) * int8_t(0x7f));
+  (void)(int16_t(0x7fff) * int16_t(0x7fff));
+  (void)(uint16_t(0xffff) * int16_t(0x7fff));
+  (void)(uint16_t(0xffff) * uint16_t(0x8000));
+
+  // Not an unsigned overflow
+  (void)(uint16_t(0xffff) * uint16_t(0x8001));
+
+  (void)(uint32_t(0xffffffff) * uint32_t(0x2));
+  // CHECK: umul-overflow.cpp:15:31: runtime error: unsigned integer overflow: 4294967295 * 2 cannot be represented in type 'unsigned int'
+
+  return 0;
+}
diff --git a/lib/ubsan/lit_tests/Integer/usub-overflow.cpp b/lib/ubsan/lit_tests/Integer/usub-overflow.cpp
new file mode 100644
index 0000000..357d662
--- /dev/null
+++ b/lib/ubsan/lit_tests/Integer/usub-overflow.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang -DSUB_I32 -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I32
+// RUN: %clang -DSUB_I64 -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I64
+// RUN: %clang -DSUB_I128 -fsanitize=unsigned-integer-overflow %s -o %t && %t 2>&1 | FileCheck %s --check-prefix=SUB_I128
+
+#include <stdint.h>
+#include <stdio.h>
+
+int main() {
+  // These promote to 'int'.
+  (void)(uint8_t(0) - uint8_t(0x7f));
+  (void)(uint16_t(0) - uint16_t(0x7fff));
+
+#ifdef SUB_I32
+  (void)(uint32_t(1) - uint32_t(2));
+  // CHECK-SUB_I32: usub-overflow.cpp:[[@LINE-1]]:22: runtime error: unsigned integer overflow: 1 - 2 cannot be represented in type 'unsigned int'
+#endif
+
+#ifdef SUB_I64
+  (void)(uint64_t(8000000000000000000ll) - uint64_t(9000000000000000000ll));
+  // CHECK-SUB_I64: 8000000000000000000 - 9000000000000000000 cannot be represented in type 'unsigned long'
+#endif
+
+#ifdef SUB_I128
+# ifdef __SIZEOF_INT128__
+  (void)((__uint128_t(1) << 126) - (__uint128_t(1) << 127));
+# else
+  puts("__int128 not supported\n");
+# endif
+  // CHECK-SUB_I128: {{0x40000000000000000000000000000000 - 0x80000000000000000000000000000000 cannot be represented in type 'unsigned __int128'|__int128 not supported}}
+#endif
+}
diff --git a/lib/ubsan/lit_tests/Misc/missing_return.cpp b/lib/ubsan/lit_tests/Misc/missing_return.cpp
index 73febea..9997b83 100644
--- a/lib/ubsan/lit_tests/Misc/missing_return.cpp
+++ b/lib/ubsan/lit_tests/Misc/missing_return.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang -fcatch-undefined-behavior %s -O3 -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=return %s -O3 -o %t && %t 2>&1 | FileCheck %s
 
-// CHECK: missing_return.cpp:4:5: fatal error: execution reached the end of a value-returning function without returning a value
+// CHECK: missing_return.cpp:4:5: runtime error: execution reached the end of a value-returning function without returning a value
 int f() {
 }
 
diff --git a/lib/ubsan/lit_tests/Misc/unreachable.cpp b/lib/ubsan/lit_tests/Misc/unreachable.cpp
index ded1de6..5ca4e5f 100644
--- a/lib/ubsan/lit_tests/Misc/unreachable.cpp
+++ b/lib/ubsan/lit_tests/Misc/unreachable.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang -fcatch-undefined-behavior %s -O3 -o %t && %t 2>&1 | FileCheck %s
+// RUN: %clang -fsanitize=unreachable %s -O3 -o %t && %t 2>&1 | FileCheck %s
 
 int main(int, char **argv) {
-  // CHECK: unreachable.cpp:5:3: fatal error: execution reached a __builtin_unreachable() call
+  // CHECK: unreachable.cpp:5:3: runtime error: execution reached a __builtin_unreachable() call
   __builtin_unreachable();
 }
diff --git a/lib/ubsan/lit_tests/Misc/vla.c b/lib/ubsan/lit_tests/Misc/vla.c
index 4137e80..2fa88ad 100644
--- a/lib/ubsan/lit_tests/Misc/vla.c
+++ b/lib/ubsan/lit_tests/Misc/vla.c
@@ -1,11 +1,11 @@
-// RUN: %clang -fcatch-undefined-behavior %s -O3 -o %t
+// RUN: %clang -fsanitize=vla-bound %s -O3 -o %t
 // RUN: %t 2>&1 | FileCheck %s --check-prefix=CHECK-MINUS-ONE
 // RUN: %t a 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO
 // RUN: %t a b
 
 int main(int argc, char **argv) {
-  // CHECK-MINUS-ONE: vla.c:9:11: fatal error: variable length array bound evaluates to non-positive value -1
-  // CHECK-ZERO: vla.c:9:11: fatal error: variable length array bound evaluates to non-positive value 0
+  // CHECK-MINUS-ONE: vla.c:9:11: runtime error: variable length array bound evaluates to non-positive value -1
+  // CHECK-ZERO: vla.c:9:11: runtime error: variable length array bound evaluates to non-positive value 0
   int arr[argc - 2];
   return 0;
 }
diff --git a/lib/ubsan/lit_tests/TypeCheck/misaligned.cpp b/lib/ubsan/lit_tests/TypeCheck/misaligned.cpp
index acc73e0..af52bd1 100644
--- a/lib/ubsan/lit_tests/TypeCheck/misaligned.cpp
+++ b/lib/ubsan/lit_tests/TypeCheck/misaligned.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -fcatch-undefined-behavior %s -O3 -o %t
+// RUN: %clang -fsanitize=alignment %s -O3 -o %t
 // RUN: %t l0 && %t s0 && %t r0 && %t m0 && %t f0
 // RUN: %t l1 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD
 // RUN: %t s1 2>&1 | FileCheck %s --check-prefix=CHECK-STORE
@@ -22,21 +22,21 @@
 
   switch (argv[1][0]) {
   case 'l':
-    // CHECK-LOAD: misaligned.cpp:26:12: fatal error: load of misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
+    // CHECK-LOAD: misaligned.cpp:26:12: runtime error: load of misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
     return *p;
   case 's':
-    // CHECK-STORE: misaligned.cpp:29:5: fatal error: store to misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
+    // CHECK-STORE: misaligned.cpp:29:5: runtime error: store to misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
     *p = 1;
     break;
   case 'r':
-    // CHECK-REFERENCE: misaligned.cpp:33:15: fatal error: reference binding to misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
+    // CHECK-REFERENCE: misaligned.cpp:33:15: runtime error: reference binding to misaligned address 0x{{[0-9a-f]*}} for type 'int', which requires 4 byte alignment
     {int &r = *p;}
     break;
   case 'm':
-    // CHECK-MEMBER: misaligned.cpp:37:15: fatal error: member access within misaligned address 0x{{[0-9a-f]*}} for type 'S', which requires 4 byte alignment
+    // CHECK-MEMBER: misaligned.cpp:37:15: runtime error: member access within misaligned address 0x{{[0-9a-f]*}} for type 'S', which requires 4 byte alignment
     return s->k;
   case 'f':
-    // CHECK-MEMFUN: misaligned.cpp:40:12: fatal error: member call on misaligned address 0x{{[0-9a-f]*}} for type 'S', which requires 4 byte alignment
+    // CHECK-MEMFUN: misaligned.cpp:40:12: runtime error: member call on misaligned address 0x{{[0-9a-f]*}} for type 'S', which requires 4 byte alignment
     return s->f();
   }
 }
diff --git a/lib/ubsan/lit_tests/TypeCheck/null.cpp b/lib/ubsan/lit_tests/TypeCheck/null.cpp
index 8e17388..f72af28 100644
--- a/lib/ubsan/lit_tests/TypeCheck/null.cpp
+++ b/lib/ubsan/lit_tests/TypeCheck/null.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -fcatch-undefined-behavior %s -O3 -o %t
+// RUN: %clang -fsanitize=null %s -O3 -o %t
 // RUN: %t l 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD
 // RUN: %t s 2>&1 | FileCheck %s --check-prefix=CHECK-STORE
 // RUN: %t r 2>&1 | FileCheck %s --check-prefix=CHECK-REFERENCE
@@ -18,21 +18,21 @@
 
   switch (argv[1][0]) {
   case 'l':
-    // CHECK-LOAD: null.cpp:22:12: fatal error: load of null pointer of type 'int'
+    // CHECK-LOAD: null.cpp:22:12: runtime error: load of null pointer of type 'int'
     return *p;
   case 's':
-    // CHECK-STORE: null.cpp:25:5: fatal error: store to null pointer of type 'int'
+    // CHECK-STORE: null.cpp:25:5: runtime error: store to null pointer of type 'int'
     *p = 1;
     break;
   case 'r':
-    // CHECK-REFERENCE: null.cpp:29:15: fatal error: reference binding to null pointer of type 'int'
+    // CHECK-REFERENCE: null.cpp:29:15: runtime error: reference binding to null pointer of type 'int'
     {int &r = *p;}
     break;
   case 'm':
-    // CHECK-MEMBER: null.cpp:33:15: fatal error: member access within null pointer of type 'S'
+    // CHECK-MEMBER: null.cpp:33:15: runtime error: member access within null pointer of type 'S'
     return s->k;
   case 'f':
-    // CHECK-MEMFUN: null.cpp:36:12: fatal error: member call on null pointer of type 'S'
+    // CHECK-MEMFUN: null.cpp:36:12: runtime error: member call on null pointer of type 'S'
     return s->f();
   }
 }
diff --git a/lib/ubsan/lit_tests/TypeCheck/vptr.cpp b/lib/ubsan/lit_tests/TypeCheck/vptr.cpp
index c8e2820..6533568 100644
--- a/lib/ubsan/lit_tests/TypeCheck/vptr.cpp
+++ b/lib/ubsan/lit_tests/TypeCheck/vptr.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -ccc-cxx -fcatch-undefined-behavior %s -O3 -o %t
+// RUN: %clang -ccc-cxx -fsanitize=vptr %s -O3 -o %t
 // RUN: %t rT && %t mT && %t fT
 // RUN: %t rU && %t mU && %t fU
 // RUN: %t rS 2>&1 | FileCheck %s --check-prefix=CHECK-REFERENCE
@@ -8,6 +8,9 @@
 // RUN: %t mV 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER
 // RUN: %t fV 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN
 
+// FIXME: This test produces linker errors on Darwin.
+// XFAIL: darwin
+
 struct S {
   S() : a(0) {}
   ~S() {}
@@ -61,14 +64,14 @@
 
   switch (argv[1][0]) {
   case 'r':
-    // CHECK-REFERENCE: vptr.cpp:65:13: fatal error: reference binding to address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
+    // CHECK-REFERENCE: vptr.cpp:[[@LINE+1]]:13: runtime error: reference binding to address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
     {T &r = *p;}
     break;
   case 'm':
-    // CHECK-MEMBER: vptr.cpp:69:15: fatal error: member access within address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
+    // CHECK-MEMBER: vptr.cpp:[[@LINE+1]]:15: runtime error: member access within address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
     return p->b;
   case 'f':
-    // CHECK-MEMFUN: vptr.cpp:72:12: fatal error: member call on address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
+    // CHECK-MEMFUN: vptr.cpp:[[@LINE+1]]:12: runtime error: member call on address 0x{{[0-9a-f]*}} which does not point to an object of type 'T'
     return p->g();
   }
 }
diff --git a/lib/ubsan/lit_tests/lit.cfg b/lib/ubsan/lit_tests/lit.cfg
index 50a8cbd..9fd3a1a 100644
--- a/lib/ubsan/lit_tests/lit.cfg
+++ b/lib/ubsan/lit_tests/lit.cfg
@@ -59,6 +59,7 @@
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp']
 
-# UndefinedBehaviorSanitizer tests are currently supported on Linux only.
-if config.host_os not in ['Linux']:
+# UndefinedBehaviorSanitizer tests are currently supported on
+# Linux and Darwin only.
+if config.host_os not in ['Linux', 'Darwin']:
   config.unsupported = True
diff --git a/lib/ubsan/ubsan_diag.cc b/lib/ubsan/ubsan_diag.cc
index 05d8132..8a1af4b 100644
--- a/lib/ubsan/ubsan_diag.cc
+++ b/lib/ubsan/ubsan_diag.cc
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "ubsan_diag.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
 #include <stdio.h>
-#include <unistd.h>
-#include <limits.h>
 
 using namespace __ubsan;
 
@@ -37,7 +37,7 @@
 /// Hexadecimal printing for numbers too large for fprintf to handle directly.
 static void PrintHex(UIntMax Val) {
 #if HAVE_INT128_T
-  fprintf(stderr, "0x%08x%08x%08x%08x",
+  Printf("0x%08x%08x%08x%08x",
           (unsigned int)(Val >> 96),
           (unsigned int)(Val >> 64),
           (unsigned int)(Val >> 32),
@@ -48,55 +48,64 @@
 }
 
 Diag::~Diag() {
-  // FIXME: This is non-portable.
-  bool UseAnsiColor = isatty(STDERR_FILENO);
+  bool UseAnsiColor = PrintsToTty();
   if (UseAnsiColor)
-    fprintf(stderr, "\033[1m");
+    RawWrite("\033[1m");
   if (Loc.isInvalid())
-    fprintf(stderr, "<unknown>:");
+    RawWrite("<unknown>:");
   else {
-    fprintf(stderr, "%s:%d:", Loc.getFilename(), Loc.getLine());
+    Printf("%s:%d:", Loc.getFilename(), Loc.getLine());
     if (Loc.getColumn())
-      fprintf(stderr, "%d:", Loc.getColumn());
+      Printf("%d:", Loc.getColumn());
   }
   if (UseAnsiColor)
-    fprintf(stderr, "\033[31m");
-  fprintf(stderr, " fatal error: ");
+    RawWrite("\033[31m");
+  RawWrite(" runtime error: ");
   if (UseAnsiColor)
-    fprintf(stderr, "\033[0;1m");
+    RawWrite("\033[0;1m");
   for (const char *Msg = Message; *Msg; ++Msg) {
-    if (*Msg != '%')
-      fputc((unsigned char)*Msg, stderr);
-    else {
+    if (*Msg != '%') {
+      char Buffer[64];
+      unsigned I;
+      for (I = 0; Msg[I] && Msg[I] != '%' && I != 63; ++I)
+        Buffer[I] = Msg[I];
+      Buffer[I] = '\0';
+      RawWrite(Buffer);
+      Msg += I - 1;
+    } else {
       const Arg &A = Args[*++Msg - '0'];
       switch (A.Kind) {
       case AK_String:
-        fprintf(stderr, "%s", A.String);
+        Printf("%s", A.String);
         break;
       case AK_SInt:
         // 'long long' is guaranteed to be at least 64 bits wide.
         if (A.SInt >= INT64_MIN && A.SInt <= INT64_MAX)
-          fprintf(stderr, "%lld", (long long)A.SInt);
+          Printf("%lld", (long long)A.SInt);
         else
           PrintHex(A.SInt);
         break;
       case AK_UInt:
         if (A.UInt <= UINT64_MAX)
-          fprintf(stderr, "%llu", (unsigned long long)A.UInt);
+          Printf("%llu", (unsigned long long)A.UInt);
         else
           PrintHex(A.UInt);
         break;
-      case AK_Float:
-        fprintf(stderr, "%Lg", (long double)A.Float);
+      case AK_Float: {
+        // FIXME: Support floating-point formatting in sanitizer_common's
+        //        printf, and stop using snprintf here.
+        char Buffer[32];
+        snprintf(Buffer, sizeof(Buffer), "%Lg", (long double)A.Float);
+        Printf("%s", Buffer);
         break;
+      }
       case AK_Pointer:
-        fprintf(stderr, "0x%zx", (uptr)A.Pointer);
+        Printf("0x%zx", (uptr)A.Pointer);
         break;
       }
     }
   }
-  fputc('\n', stderr);
+  RawWrite("\n");
   if (UseAnsiColor)
-    fprintf(stderr, "\033[0m");
-  fflush(stderr);
+    Printf("\033[0m");
 }
diff --git a/lib/ubsan/ubsan_handlers.cc b/lib/ubsan/ubsan_handlers.cc
index ae0f1f6..47f06e8 100644
--- a/lib/ubsan/ubsan_handlers.cc
+++ b/lib/ubsan/ubsan_handlers.cc
@@ -19,18 +19,6 @@
 using namespace __sanitizer;
 using namespace __ubsan;
 
-NORETURN void __sanitizer::Die() {
-  __builtin_trap();
-}
-
-NORETURN void __sanitizer::CheckFailed(const char *File, int Line,
-                                       const char *Cond, u64 V1, u64 V2) {
-  Diag(SourceLocation(File, Line, 0),
-       "CHECK failed: %0 (with values %1 and %2)")
-    << Cond << V1 << V2;
-  Die();
-}
-
 namespace __ubsan {
   const char *TypeCheckKinds[] = {
     "load of", "store to", "reference binding to", "member access within",
@@ -52,33 +40,55 @@
     Diag(Data->Loc, "%0 address %1 with insufficient space "
                     "for an object of type %2")
       << TypeCheckKinds[Data->TypeCheckKind] << (void*)Pointer << Data->Type;
+}
+void __ubsan::__ubsan_handle_type_mismatch_abort(TypeMismatchData *Data,
+                                                  ValueHandle Pointer) {
+  __ubsan_handle_type_mismatch(Data, Pointer);
   Die();
 }
 
-/// \brief Common diagnostic emission for various forms of signed overflow.
-template<typename T> static void HandleSignedOverflow(OverflowData *Data,
+/// \brief Common diagnostic emission for various forms of integer overflow.
+template<typename T> static void HandleIntegerOverflow(OverflowData *Data,
                                                       ValueHandle LHS,
                                                       const char *Operator,
                                                       T RHS) {
-  Diag(Data->Loc, "signed integer overflow: "
-                  "%0 %1 %2 cannot be represented in type %3")
+  Diag(Data->Loc, "%0 integer overflow: "
+                  "%1 %2 %3 cannot be represented in type %4")
+    << (Data->Type.isSignedIntegerTy() ? "signed" : "unsigned")
     << Value(Data->Type, LHS) << Operator << RHS << Data->Type;
-  Die();
 }
 
 void __ubsan::__ubsan_handle_add_overflow(OverflowData *Data,
                                           ValueHandle LHS, ValueHandle RHS) {
-  HandleSignedOverflow(Data, LHS, "+", Value(Data->Type, RHS));
+  HandleIntegerOverflow(Data, LHS, "+", Value(Data->Type, RHS));
+}
+void __ubsan::__ubsan_handle_add_overflow_abort(OverflowData *Data,
+                                                 ValueHandle LHS,
+                                                 ValueHandle RHS) {
+  __ubsan_handle_add_overflow(Data, LHS, RHS);
+  Die();
 }
 
 void __ubsan::__ubsan_handle_sub_overflow(OverflowData *Data,
                                           ValueHandle LHS, ValueHandle RHS) {
-  HandleSignedOverflow(Data, LHS, "-", Value(Data->Type, RHS));
+  HandleIntegerOverflow(Data, LHS, "-", Value(Data->Type, RHS));
+}
+void __ubsan::__ubsan_handle_sub_overflow_abort(OverflowData *Data,
+                                                 ValueHandle LHS,
+                                                 ValueHandle RHS) {
+  __ubsan_handle_sub_overflow(Data, LHS, RHS);
+  Die();
 }
 
 void __ubsan::__ubsan_handle_mul_overflow(OverflowData *Data,
                                           ValueHandle LHS, ValueHandle RHS) {
-  HandleSignedOverflow(Data, LHS, "*", Value(Data->Type, RHS));
+  HandleIntegerOverflow(Data, LHS, "*", Value(Data->Type, RHS));
+}
+void __ubsan::__ubsan_handle_mul_overflow_abort(OverflowData *Data,
+                                                 ValueHandle LHS,
+                                                 ValueHandle RHS) {
+  __ubsan_handle_mul_overflow(Data, LHS, RHS);
+  Die();
 }
 
 void __ubsan::__ubsan_handle_negate_overflow(OverflowData *Data,
@@ -86,6 +96,10 @@
   Diag(Data->Loc, "negation of %0 cannot be represented in type %1; "
                   "cast to an unsigned type to negate this value to itself")
     << Value(Data->Type, OldVal) << Data->Type;
+}
+void __ubsan::__ubsan_handle_negate_overflow_abort(OverflowData *Data,
+                                                    ValueHandle OldVal) {
+  __ubsan_handle_negate_overflow(Data, OldVal);
   Die();
 }
 
@@ -98,6 +112,11 @@
       << LHSVal << Data->Type;
   else
     Diag(Data->Loc, "division by zero");
+}
+void __ubsan::__ubsan_handle_divrem_overflow_abort(OverflowData *Data,
+                                                    ValueHandle LHS,
+                                                    ValueHandle RHS) {
+  __ubsan_handle_divrem_overflow(Data, LHS, RHS);
   Die();
 }
 
@@ -116,6 +135,12 @@
   else
     Diag(Data->Loc, "left shift of %0 by %1 places cannot be represented "
                     "in type %2") << LHSVal << RHSVal << Data->LHSType;
+}
+void __ubsan::__ubsan_handle_shift_out_of_bounds_abort(
+                                                     ShiftOutOfBoundsData *Data,
+                                                     ValueHandle LHS,
+                                                     ValueHandle RHS) {
+  __ubsan_handle_shift_out_of_bounds(Data, LHS, RHS);
   Die();
 }
 
@@ -135,6 +160,10 @@
   Diag(Data->Loc, "variable length array bound evaluates to "
                   "non-positive value %0")
     << Value(Data->Type, Bound);
+}
+void __ubsan::__ubsan_handle_vla_bound_not_positive_abort(VLABoundData *Data,
+                                                           ValueHandle Bound) {
+  __ubsan_handle_vla_bound_not_positive(Data, Bound);
   Die();
 }
 
@@ -143,5 +172,10 @@
   Diag(SourceLocation(), "value %0 is outside the range of representable "
                          "values of type %2")
     << Value(Data->FromType, From) << Data->FromType << Data->ToType;
+}
+void __ubsan::__ubsan_handle_float_cast_overflow_abort(
+                                                    FloatCastOverflowData *Data,
+                                                    ValueHandle From) {
+  __ubsan_handle_float_cast_overflow(Data, From);
   Die();
 }
diff --git a/lib/ubsan/ubsan_handlers.h b/lib/ubsan/ubsan_handlers.h
index dc61fd3..5709fcf 100644
--- a/lib/ubsan/ubsan_handlers.h
+++ b/lib/ubsan/ubsan_handlers.h
@@ -24,36 +24,35 @@
   unsigned char TypeCheckKind;
 };
 
+#define RECOVERABLE(checkname, ...) \
+  extern "C" void __ubsan_handle_ ## checkname( __VA_ARGS__ ); \
+  extern "C" void __ubsan_handle_ ## checkname ## _abort( __VA_ARGS__ );
+
 /// \brief Handle a runtime type check failure, caused by either a misaligned
 /// pointer, a null pointer, or a pointer to insufficient storage for the
 /// type.
-extern "C" void __ubsan_handle_type_mismatch(TypeMismatchData *Data,
-                                             ValueHandle Pointer);
+RECOVERABLE(type_mismatch, TypeMismatchData *Data, ValueHandle Pointer)
 
 struct OverflowData {
   SourceLocation Loc;
   const TypeDescriptor &Type;
 };
 
-/// \brief Handle a signed integer addition overflow.
-extern "C" void __ubsan_handle_add_overflow(OverflowData *Data,
-                                            ValueHandle LHS,
-                                            ValueHandle RHS);
-/// \brief Handle a signed integer subtraction overflow.
-extern "C" void __ubsan_handle_sub_overflow(OverflowData *Data,
-                                            ValueHandle LHS,
-                                            ValueHandle RHS);
-/// \brief Handle a signed integer multiplication overflow.
-extern "C" void __ubsan_handle_mul_overflow(OverflowData *Data,
-                                            ValueHandle LHS,
-                                            ValueHandle RHS);
+/// \brief Handle an integer addition overflow.
+RECOVERABLE(add_overflow, OverflowData *Data, ValueHandle LHS, ValueHandle RHS)
+
+/// \brief Handle an integer subtraction overflow.
+RECOVERABLE(sub_overflow, OverflowData *Data, ValueHandle LHS, ValueHandle RHS)
+
+/// \brief Handle an integer multiplication overflow.
+RECOVERABLE(mul_overflow, OverflowData *Data, ValueHandle LHS, ValueHandle RHS)
+
 /// \brief Handle a signed integer overflow for a unary negate operator.
-extern "C" void __ubsan_handle_negate_overflow(OverflowData *Data,
-                                               ValueHandle OldVal);
+RECOVERABLE(negate_overflow, OverflowData *Data, ValueHandle OldVal)
+
 /// \brief Handle an INT_MIN/-1 overflow or division by zero.
-extern "C" void __ubsan_handle_divrem_overflow(OverflowData *Data,
-                                               ValueHandle LHS,
-                                               ValueHandle RHS);
+RECOVERABLE(divrem_overflow, OverflowData *Data,
+            ValueHandle LHS, ValueHandle RHS)
 
 struct ShiftOutOfBoundsData {
   SourceLocation Loc;
@@ -63,9 +62,8 @@
 
 /// \brief Handle a shift where the RHS is out of bounds or a left shift where
 /// the LHS is negative or overflows.
-extern "C" void __ubsan_handle_shift_out_of_bounds(ShiftOutOfBoundsData *Data,
-                                                   ValueHandle LHS,
-                                                   ValueHandle RHS);
+RECOVERABLE(shift_out_of_bounds, ShiftOutOfBoundsData *Data,
+            ValueHandle LHS, ValueHandle RHS)
 
 struct UnreachableData {
   SourceLocation Loc;
@@ -82,8 +80,7 @@
 };
 
 /// \brief Handle a VLA with a non-positive bound.
-extern "C" void __ubsan_handle_vla_bound_not_positive(VLABoundData *Data,
-                                                      ValueHandle Bound);
+RECOVERABLE(vla_bound_not_positive, VLABoundData *Data, ValueHandle Bound)
 
 struct FloatCastOverflowData {
   // FIXME: SourceLocation Loc;
@@ -92,8 +89,7 @@
 };
 
 /// \brief Handle overflow in a conversion to or from a floating-point type.
-extern "C" void __ubsan_handle_float_cast_overflow(FloatCastOverflowData *Data,
-                                                   ValueHandle From);
+RECOVERABLE(float_cast_overflow, FloatCastOverflowData *Data, ValueHandle From)
 
 }
 
diff --git a/lib/ubsan/ubsan_handlers_cxx.cc b/lib/ubsan/ubsan_handlers_cxx.cc
index e0d3442..593fe13 100644
--- a/lib/ubsan/ubsan_handlers_cxx.cc
+++ b/lib/ubsan/ubsan_handlers_cxx.cc
@@ -26,8 +26,9 @@
   extern const char *TypeCheckKinds[];
 }
 
-void __ubsan::__ubsan_handle_dynamic_type_cache_miss(
-  DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash) {
+static void HandleDynamicTypeCacheMiss(
+  DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash,
+  bool abort) {
   if (checkDynamicType((void*)Pointer, Data->TypeInfo, Hash))
     // Just a cache miss. The type matches after all.
     return;
@@ -36,7 +37,7 @@
     << TypeCheckKinds[Data->TypeCheckKind] << (void*)Pointer << Data->Type;
   // FIXME: If possible, say what type it actually points to. Produce a note
   //        pointing out the vptr:
-  // lib/VMCore/Instructions.cpp:2020:10: fatal error: member call on address
+  // lib/VMCore/Instructions.cpp:2020:10: runtime error: member call on address
   //       0xb7a4440 which does not point to an object of type
   //       'llvm::OverflowingBinaryOperator'
   //   return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
@@ -45,5 +46,15 @@
   //   00 00 00 00  e0 f7 c5 09 00 00 00 00  20 00 00 00
   //                ^~~~~~~~~~~
   //                vptr for 'llvm::BinaryOperator'
-  Die();
+  if (abort)
+    Die();
+}
+
+void __ubsan::__ubsan_handle_dynamic_type_cache_miss(
+  DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash) {
+  HandleDynamicTypeCacheMiss(Data, Pointer, Hash, false);
+}
+void __ubsan::__ubsan_handle_dynamic_type_cache_miss_abort(
+  DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash) {
+  HandleDynamicTypeCacheMiss(Data, Pointer, Hash, true);
 }
diff --git a/lib/ubsan/ubsan_handlers_cxx.h b/lib/ubsan/ubsan_handlers_cxx.h
index 8192e65..0fbcafb 100644
--- a/lib/ubsan/ubsan_handlers_cxx.h
+++ b/lib/ubsan/ubsan_handlers_cxx.h
@@ -30,6 +30,8 @@
 /// cache; this does not necessarily imply the existence of a bug.
 extern "C" void __ubsan_handle_dynamic_type_cache_miss(
   DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash);
+extern "C" void __ubsan_handle_dynamic_type_cache_miss_abort(
+  DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash);
 
 }
 
diff --git a/lib/ubsan/ubsan_value.h b/lib/ubsan/ubsan_value.h
index a855791..21313fc 100644
--- a/lib/ubsan/ubsan_value.h
+++ b/lib/ubsan/ubsan_value.h
@@ -14,16 +14,16 @@
 #ifndef UBSAN_VALUE_H
 #define UBSAN_VALUE_H
 
-// For now, only support linux. Other platforms should be easy to add, and
-// probably work as-is.
-#if !defined(__linux__)
+// For now, only support linux and darwin. Other platforms should be easy to
+// add, and probably work as-is.
+#if !defined(__linux__) && !defined(__APPLE__)
 #error "UBSan not supported for this platform!"
 #endif
 
 #include "sanitizer_common/sanitizer_common.h"
 
 // FIXME: Move this out to a config header.
-#if defined(__clang__) || __SIZEOF_INT128__
+#if __SIZEOF_INT128__
 typedef __int128 s128;
 typedef unsigned __int128 u128;
 #define HAVE_INT128_T 1
diff --git a/make/AppleBI.mk b/make/AppleBI.mk
index 96f8222..b5e702b 100644
--- a/make/AppleBI.mk
+++ b/make/AppleBI.mk
@@ -64,8 +64,7 @@
 $(SYMROOT)/libcompiler_rt.dylib: $(foreach arch,$(filter-out armv4t,$(RC_ARCHS)), \
                                         $(OBJROOT)/libcompiler_rt-$(arch).dylib)
 	$(call GetCNAVar,LIPO,Platform.darwin_bni,Release,) -create $^ -o  $@
-
-
+	$(call GetCNAVar,DSYMUTIL,Platform.darwin_bni,Release,) $@
 
 
 # Copy results to DSTROOT.
diff --git a/make/config.mk b/make/config.mk
index 12d8bc2..6398d05 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -21,6 +21,7 @@
 DATE := date
 LIPO := lipo
 CP := cp
+DSYMUTIL := dsymutil
 
 VERBOSE := 0
 DEBUGMAKE :=
diff --git a/make/lib_info.mk b/make/lib_info.mk
index 5e66816..31850f7 100644
--- a/make/lib_info.mk
+++ b/make/lib_info.mk
@@ -56,4 +56,4 @@
                     CC CFLAGS LDFLAGS FUNCTIONS OPTIMIZED \
                     RANLIB RANLIBFLAGS \
                     VISIBILITY_HIDDEN KERNEL_USE \
-                    SHARED_LIBRARY SHARED_LIBRARY_SUFFIX STRIP LIPO
+                    SHARED_LIBRARY SHARED_LIBRARY_SUFFIX STRIP LIPO DSYMUTIL
diff --git a/make/options.mk b/make/options.mk
index ec4a55d..67197de 100644
--- a/make/options.mk
+++ b/make/options.mk
@@ -43,5 +43,6 @@
 
 STRIP := strip
 LIPO := lipo
+DSYMUTIL := dsymutil
 
 SHARED_LIBRARY_SUFFIX := so
diff --git a/make/platform/clang_darwin.mk b/make/platform/clang_darwin.mk
index 5bc0e10..fe84a05 100644
--- a/make/platform/clang_darwin.mk
+++ b/make/platform/clang_darwin.mk
@@ -76,6 +76,9 @@
 Configs += asan_osx_dynamic
 UniversalArchs.asan_osx_dynamic := $(call CheckArches,i386 x86_64,asan_osx_dynamic)
 
+Configs += ubsan_osx
+UniversalArchs.ubsan_osx := $(call CheckArches,i386 x86_64,ubsan_osx)
+
 # Darwin 10.6 has a bug in cctools that makes it unable to use ranlib on our ARM
 # object files. If we are on that platform, strip out all ARM archs. We still
 # build the libraries themselves so that Clang can find them where it expects
@@ -131,6 +134,8 @@
 	$(CFLAGS) -mmacosx-version-min=10.5 -fno-builtin \
 	-DMAC_INTERPOSE_FUNCTIONS=1
 
+CFLAGS.ubsan_osx	:= $(CFLAGS) -mmacosx-version-min=10.5 -fno-builtin
+
 CFLAGS.ios.i386		:= $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
 CFLAGS.ios.x86_64	:= $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
 CFLAGS.ios.armv7	:= $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
@@ -183,6 +188,8 @@
                               $(SanitizerCommonFunctions) \
 	                      $(AsanDynamicFunctions)
 
+FUNCTIONS.ubsan_osx := $(UbsanFunctions) $(SanitizerCommonFunctions)
+
 CCKEXT_COMMON_FUNCTIONS := \
 	absvdi2 \
 	absvsi2 \
diff --git a/make/platform/clang_linux.mk b/make/platform/clang_linux.mk
index 4366143..1f73145 100644
--- a/make/platform/clang_linux.mk
+++ b/make/platform/clang_linux.mk
@@ -51,19 +51,21 @@
 
 # Build runtime libraries for i386.
 ifeq ($(call contains,$(SupportedArches),i386),true)
-Configs += full-i386 profile-i386 asan-i386
+Configs += full-i386 profile-i386 asan-i386 ubsan-i386
 Arch.full-i386 := i386
 Arch.profile-i386 := i386
 Arch.asan-i386 := i386
+Arch.ubsan-i386 := i386
 endif
 
 # Build runtime libraries for x86_64.
 ifeq ($(call contains,$(SupportedArches),x86_64),true)
-Configs += full-x86_64 profile-x86_64 asan-x86_64 tsan-x86_64
+Configs += full-x86_64 profile-x86_64 asan-x86_64 tsan-x86_64 ubsan-x86_64
 Arch.full-x86_64 := x86_64
 Arch.profile-x86_64 := x86_64
 Arch.asan-x86_64 := x86_64
 Arch.tsan-x86_64 := x86_64
+Arch.ubsan-x86_64 := x86_64
 endif
 
 ifneq ($(LLVM_ANDROID_TOOLCHAIN_DIR),)
@@ -85,6 +87,8 @@
 CFLAGS.asan-i386 := $(CFLAGS) -m32 -fPIE -fno-builtin
 CFLAGS.asan-x86_64 := $(CFLAGS) -m64 -fPIE -fno-builtin
 CFLAGS.tsan-x86_64 := $(CFLAGS) -m64 -fPIE -fno-builtin
+CFLAGS.ubsan-i386 := $(CFLAGS) -m32 -fPIE -fno-builtin
+CFLAGS.ubsan-x86_64 := $(CFLAGS) -m64 -fPIE -fno-builtin
 
 SHARED_LIBRARY.asan-arm-android := 1
 ANDROID_COMMON_FLAGS := -target arm-linux-androideabi \
@@ -113,7 +117,9 @@
 FUNCTIONS.asan-arm-android := $(AsanFunctions) $(InterceptionFunctions) \
                                           $(SanitizerCommonFunctions)
 FUNCTIONS.tsan-x86_64 := $(TsanFunctions) $(InterceptionFunctions) \
-                                          $(SanitizerCommonFunctions) 
+                                          $(SanitizerCommonFunctions)
+FUNCTIONS.ubsan-i386 := $(UbsanFunctions) $(SanitizerCommonFunctions)
+FUNCTIONS.ubsan-x86_64 := $(UbsanFunctions) $(SanitizerCommonFunctions)
 
 # Always use optimized variants.
 OPTIMIZED := 1
diff --git a/make/platform/darwin_bni.mk b/make/platform/darwin_bni.mk
index 000181c..d12cfdf 100644
--- a/make/platform/darwin_bni.mk
+++ b/make/platform/darwin_bni.mk
@@ -14,6 +14,7 @@
 	RANLIB := $(shell xcrun -sdk $(SDKROOT) -find ranlib) 
 	STRIP := $(shell xcrun -sdk $(SDKROOT) -find strip) 
 	LIPO := $(shell xcrun -sdk $(SDKROOT) -find lipo)
+	DSYMUTIL := $(shell xcrun -sdk $(SDKROOT) -find dsymutil)
 endif
 
 ifneq ($(IPHONEOS_DEPLOYMENT_TARGET),)
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
deleted file mode 100644
index cb75012..0000000
--- a/utils/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_subdirectory(llvm-symbolizer)
diff --git a/utils/llvm-symbolizer/CMakeLists.txt b/utils/llvm-symbolizer/CMakeLists.txt
deleted file mode 100644
index a01ed53..0000000
--- a/utils/llvm-symbolizer/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# FIXME: As we plan to execute llvm-symbolizer binary from Sanitizer
-# runtime, it has to be compiled for all supported targets (x86_64, i386 etc).
-# This means that we need LLVM libraries to be compiled for these
-# targets as well. Currently, there is no support for such a build strategy.
-
-set(LLVM_LINK_COMPONENTS
-  DebugInfo
-  Object
-  )
-
-add_llvm_tool(llvm-symbolizer
-  llvm-symbolizer.cpp
-  )
-
-# FIXME: Set sane output directory for llvm-symbolizer
-set_target_properties(llvm-symbolizer PROPERTIES
-  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/utils/llvm-symbolizer/llvm-symbolizer.cpp b/utils/llvm-symbolizer/llvm-symbolizer.cpp
deleted file mode 100644
index 79bdced..0000000
--- a/utils/llvm-symbolizer/llvm-symbolizer.cpp
+++ /dev/null
@@ -1,379 +0,0 @@
-//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This utility works much like "addr2line". It is able of transforming
-// tuples (module name, module offset) to code locations (function name,
-// file, line number, column number). It is targeted for compiler-rt tools
-// (especially AddressSanitizer and ThreadSanitizer) that can use it
-// to symbolize stack traces in their error reports.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <cstdio>
-#include <cstring>
-#include <map>
-#include <string>
-
-using namespace llvm;
-using namespace object;
-
-static cl::opt<bool>
-UseSymbolTable("use-symbol-table", cl::init(true),
-               cl::desc("Prefer names in symbol table to names "
-                        "in debug info"));
-
-static cl::opt<bool>
-PrintFunctions("functions", cl::init(true),
-               cl::desc("Print function names as well as line "
-                        "information for a given address"));
-
-static cl::opt<bool>
-PrintInlining("inlining", cl::init(true),
-              cl::desc("Print all inlined frames for a given address"));
-
-static cl::opt<bool>
-Demangle("demangle", cl::init(true),
-         cl::desc("Demangle function names"));
-
-static StringRef ToolInvocationPath;
-
-static bool error(error_code ec) {
-  if (!ec) return false;
-  errs() << ToolInvocationPath << ": error reading file: "
-         << ec.message() << ".\n";
-  return true;
-}
-
-static uint32_t getDILineInfoSpecifierFlags() {
-  uint32_t Flags = llvm::DILineInfoSpecifier::FileLineInfo |
-                   llvm::DILineInfoSpecifier::AbsoluteFilePath;
-  if (PrintFunctions)
-    Flags |= llvm::DILineInfoSpecifier::FunctionName;
-  return Flags;
-}
-
-static void patchFunctionNameInDILineInfo(const std::string &NewFunctionName,
-                                          DILineInfo &LineInfo) {
-  std::string FileName = LineInfo.getFileName();
-  LineInfo = DILineInfo(StringRef(FileName), StringRef(NewFunctionName),
-                        LineInfo.getLine(), LineInfo.getColumn());
-}
-
-namespace {
-class ModuleInfo {
-  OwningPtr<ObjectFile> Module;
-  OwningPtr<DIContext> DebugInfoContext;
- public:
-  ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
-      : Module(Obj), DebugInfoContext(DICtx) {}
-
-  DILineInfo symbolizeCode(uint64_t ModuleOffset) const {
-    DILineInfo LineInfo;
-    if (DebugInfoContext) {
-      LineInfo = DebugInfoContext->getLineInfoForAddress(
-          ModuleOffset, getDILineInfoSpecifierFlags());
-    }
-    // Override function name from symbol table if necessary.
-    if (PrintFunctions && UseSymbolTable) {
-      std::string Function;
-      if (getFunctionNameFromSymbolTable(ModuleOffset, Function)) {
-        patchFunctionNameInDILineInfo(Function, LineInfo);
-      }
-    }
-    return LineInfo;
-  }
-
-  DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset) const {
-    DIInliningInfo InlinedContext;
-    if (DebugInfoContext) {
-      InlinedContext = DebugInfoContext->getInliningInfoForAddress(
-          ModuleOffset, getDILineInfoSpecifierFlags());
-    }
-    // Make sure there is at least one frame in context.
-    if (InlinedContext.getNumberOfFrames() == 0) {
-      InlinedContext.addFrame(DILineInfo());
-    }
-    // Override the function name in lower frame with name from symbol table.
-    if (PrintFunctions && UseSymbolTable) {
-      DIInliningInfo PatchedInlinedContext;
-      for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames();
-           i != n; i++) {
-        DILineInfo LineInfo = InlinedContext.getFrame(i);
-        if (i == n - 1) {
-          std::string Function;
-          if (getFunctionNameFromSymbolTable(ModuleOffset, Function)) {
-            patchFunctionNameInDILineInfo(Function, LineInfo);
-          }
-        }
-        PatchedInlinedContext.addFrame(LineInfo);
-      }
-      InlinedContext = PatchedInlinedContext;
-    }
-    return InlinedContext;
-  }
-
- private:
-  bool getFunctionNameFromSymbolTable(uint64_t Address,
-                                      std::string &FunctionName) const {
-    assert(Module);
-    error_code ec;
-    for (symbol_iterator si = Module->begin_symbols(),
-                         se = Module->end_symbols();
-                         si != se; si.increment(ec)) {
-      if (error(ec)) return false;
-      uint64_t SymbolAddress;
-      uint64_t SymbolSize;
-      SymbolRef::Type SymbolType;
-      if (error(si->getAddress(SymbolAddress)) ||
-          SymbolAddress == UnknownAddressOrSize) continue;
-      if (error(si->getSize(SymbolSize)) ||
-          SymbolSize == UnknownAddressOrSize) continue;
-      if (error(si->getType(SymbolType))) continue;
-      // FIXME: If a function has alias, there are two entries in symbol table
-      // with same address size. Make sure we choose the correct one.
-      if (SymbolAddress <= Address && Address < SymbolAddress + SymbolSize &&
-          SymbolType == SymbolRef::ST_Function) {
-        StringRef Name;
-        if (error(si->getName(Name))) continue;
-        FunctionName = Name.str();
-        return true;
-      }
-    }
-    return false;
-  }
-};
-
-typedef std::map<std::string, ModuleInfo*> ModuleMapTy;
-typedef ModuleMapTy::iterator ModuleMapIter;
-}  // namespace
-
-static ModuleMapTy Modules;
-
-static bool isFullNameOfDwarfSection(const StringRef &FullName,
-                                     const StringRef &ShortName) {
-  static const char kDwarfPrefix[] = "__DWARF,";
-  StringRef Name = FullName;
-  // Skip "__DWARF," prefix.
-  if (Name.startswith(kDwarfPrefix))
-    Name = Name.substr(strlen(kDwarfPrefix));
-  // Skip . and _ prefixes.
-  Name = Name.substr(Name.find_first_not_of("._"));
-  return (Name == ShortName);
-}
-
-// Returns true if the object endianness is known.
-static bool getObjectEndianness(const ObjectFile *Obj,
-                                bool &IsLittleEndian) {
-  // FIXME: Implement this when libLLVMObject allows to do it easily.
-  IsLittleEndian = true;
-  return true;
-}
-
-static void getDebugInfoSections(const ObjectFile *Obj,
-                                 StringRef &DebugInfoSection,
-                                 StringRef &DebugAbbrevSection,
-                                 StringRef &DebugLineSection,
-                                 StringRef &DebugArangesSection, 
-                                 StringRef &DebugStringSection, 
-                                 StringRef &DebugRangesSection) {
-  if (Obj == 0)
-    return;
-  error_code ec;
-  for (section_iterator i = Obj->begin_sections(),
-                        e = Obj->end_sections();
-                        i != e; i.increment(ec)) {
-    if (error(ec)) break;
-    StringRef Name;
-    if (error(i->getName(Name))) continue;
-    StringRef Data;
-    if (error(i->getContents(Data))) continue;
-    if (isFullNameOfDwarfSection(Name, "debug_info"))
-      DebugInfoSection = Data;
-    else if (isFullNameOfDwarfSection(Name, "debug_abbrev"))
-      DebugAbbrevSection = Data;
-    else if (isFullNameOfDwarfSection(Name, "debug_line"))
-      DebugLineSection = Data;
-    // Don't use debug_aranges for now, as address ranges contained
-    // there may not cover all instructions in the module
-    // else if (isFullNameOfDwarfSection(Name, "debug_aranges"))
-    //   DebugArangesSection = Data;
-    else if (isFullNameOfDwarfSection(Name, "debug_str"))
-      DebugStringSection = Data;
-    else if (isFullNameOfDwarfSection(Name, "debug_ranges"))
-      DebugRangesSection = Data;
-  }
-}
-
-static ObjectFile *getObjectFile(const std::string &Path) {
-  OwningPtr<MemoryBuffer> Buff;
-  MemoryBuffer::getFile(Path, Buff);
-  return ObjectFile::createObjectFile(Buff.take());
-}
-
-static std::string getDarwinDWARFResourceForModule(const std::string &Path) {
-  StringRef Basename = sys::path::filename(Path);
-  const std::string &DSymDirectory = Path + ".dSYM";
-  SmallString<16> ResourceName = StringRef(DSymDirectory);
-  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
-  sys::path::append(ResourceName, Basename);
-  return ResourceName.str();
-}
-
-static ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName) {
-  ModuleMapIter I = Modules.find(ModuleName);
-  if (I != Modules.end())
-    return I->second;
-
-  ObjectFile *Obj = getObjectFile(ModuleName);
-  if (Obj == 0) {
-    // Module name doesn't point to a valid object file.
-    Modules.insert(make_pair(ModuleName, (ModuleInfo*)0));
-    return 0;
-  }
-
-  DIContext *Context = 0;
-  bool IsLittleEndian;
-  if (getObjectEndianness(Obj, IsLittleEndian)) {
-    StringRef DebugInfo;
-    StringRef DebugAbbrev;
-    StringRef DebugLine;
-    StringRef DebugAranges;
-    StringRef DebugString;
-    StringRef DebugRanges;
-    getDebugInfoSections(Obj, DebugInfo, DebugAbbrev, DebugLine,
-                         DebugAranges, DebugString, DebugRanges);
-    
-    // On Darwin we may find DWARF in separate object file in
-    // resource directory.
-    if (isa<MachOObjectFile>(Obj)) {
-      const std::string &ResourceName = getDarwinDWARFResourceForModule(
-          ModuleName);
-      ObjectFile *ResourceObj = getObjectFile(ResourceName);
-      if (ResourceObj != 0)
-        getDebugInfoSections(ResourceObj, DebugInfo, DebugAbbrev, DebugLine,
-                             DebugAranges, DebugString, DebugRanges);
-    }
-
-    Context = DIContext::getDWARFContext(
-        IsLittleEndian, DebugInfo, DebugAbbrev,
-        DebugAranges, DebugLine, DebugString,
-        DebugRanges);
-    assert(Context);
-  }
-
-  ModuleInfo *Info = new ModuleInfo(Obj, Context);
-  Modules.insert(make_pair(ModuleName, Info));
-  return Info;
-}
-
-// Assume that __cxa_demangle is provided by libcxxabi.
-extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
-                                size_t *length, int *status);
-
-static void printDILineInfo(DILineInfo LineInfo) {
-  // By default, DILineInfo contains "<invalid>" for function/filename it
-  // cannot fetch. We replace it to "??" to make our output closer to addr2line.
-  static const std::string kDILineInfoBadString = "<invalid>";
-  static const std::string kSymbolizerBadString = "??";
-  if (PrintFunctions) {
-    std::string FunctionName = LineInfo.getFunctionName();
-    if (FunctionName == kDILineInfoBadString)
-      FunctionName = kSymbolizerBadString;
-    if (Demangle) {
-      int status = 0;
-      char *DemangledName = __cxa_demangle(
-          FunctionName.c_str(), 0, 0, &status);
-      if (status == 0) {
-        FunctionName = DemangledName;
-        free(DemangledName);
-      }
-    }
-    outs() << FunctionName << "\n";
-  }
-  std::string Filename = LineInfo.getFileName();
-  if (Filename == kDILineInfoBadString)
-    Filename = kSymbolizerBadString;
-  outs() << Filename <<
-         ":" << LineInfo.getLine() <<
-         ":" << LineInfo.getColumn() <<
-         "\n";
-}
-
-static void symbolize(std::string ModuleName, std::string ModuleOffsetStr) {
-  ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
-  uint64_t Offset = 0;
-  if (Info == 0 ||
-      StringRef(ModuleOffsetStr).getAsInteger(0, Offset)) {
-    printDILineInfo(DILineInfo());
-  } else if (PrintInlining) {
-    DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(Offset);
-    uint32_t FramesNum = InlinedContext.getNumberOfFrames();
-    assert(FramesNum > 0);
-    for (uint32_t i = 0; i < FramesNum; i++) {
-      DILineInfo LineInfo = InlinedContext.getFrame(i);
-      printDILineInfo(LineInfo);
-    }
-  } else {
-    DILineInfo LineInfo = Info->symbolizeCode(Offset);
-    printDILineInfo(LineInfo);
-  }
-
-  outs() << "\n";  // Print extra empty line to mark the end of output.
-  outs().flush();
-}
-
-static bool parseModuleNameAndOffset(std::string &ModuleName,
-                                     std::string &ModuleOffsetStr) {
-  static const int kMaxInputStringLength = 1024;
-  static const char kDelimiters[] = " \n";
-  char InputString[kMaxInputStringLength];
-  if (!fgets(InputString, sizeof(InputString), stdin))
-    return false;
-  ModuleName = "";
-  ModuleOffsetStr = "";
-  // FIXME: Handle case when filename is given in quotes.
-  if (char *FilePath = strtok(InputString, kDelimiters)) {
-    ModuleName = FilePath;
-    if (char *OffsetStr = strtok((char*)0, kDelimiters))
-      ModuleOffsetStr = OffsetStr;
-  }
-  return true;
-}
-
-int main(int argc, char **argv) {
-  // Print stack trace if we signal out.
-  sys::PrintStackTraceOnErrorSignal();
-  PrettyStackTraceProgram X(argc, argv);
-  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
-
-  cl::ParseCommandLineOptions(argc, argv, "llvm symbolizer for compiler-rt\n");
-  ToolInvocationPath = argv[0];
-
-  std::string ModuleName;
-  std::string ModuleOffsetStr;
-  while (parseModuleNameAndOffset(ModuleName, ModuleOffsetStr)) {
-    symbolize(ModuleName, ModuleOffsetStr);
-  }
-  return 0;
-}