Merge branch 'upstream/master'
diff --git a/.gitignore b/.gitignore
index 8f99959..7c53431 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
 *~
-Debug
-Release
-Profile
+darwin_fat
+clang_darwin
+multi_arch
diff --git a/BlocksRuntime/CMakeLists.txt b/BlocksRuntime/CMakeLists.txt
deleted file mode 100644
index 5b4686b..0000000
--- a/BlocksRuntime/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-PROJECT( BlocksRuntime C )
-
-SET( SRCS
- runtime.c
- data.c
- )
-
-ADD_LIBRARY( ${PROJECT_NAME} SHARED ${SRCS})
-SET_TARGET_PROPERTIES( ${PROJECT_NAME} PROPERTIES
-  INSTALL_NAME_DIR ${CMAKE_INSTALL_PREFIX}/lib )
-
-INSTALL( TARGETS ${PROJECT_NAME} DESTINATION lib )
-INSTALL( FILES Block.h Block_private.h DESTINATION include )
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 483b673..9bdd972 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,55 +1,49 @@
-# See docs/CMake.html for instructions about how to build Compiler-RT with CMake.
+# CMake build for CompilerRT.
+#
+# This build assumes that CompilerRT is checked out into the
+# 'projects/compiler-rt' inside of an LLVM tree, it is not a stand-alone build
+# system.
+#
+# An important constraint of the build is that it only produces libraries
+# based on the ability of the host toolchain to target various platforms.
 
-PROJECT( CompilerRT C )
-CMAKE_MINIMUM_REQUIRED( VERSION 2.6 )
+include(LLVMParseArguments)
 
-set(PACKAGE_NAME compiler-rt)
-set(PACKAGE_VERSION 1.0svn)
-set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
-set(PACKAGE_BUGREPORT "llvmbugs@cs.uiuc.edu")
+# FIXME: Below we assume that the target build of LLVM/Clang is x86, which is
+# not at all valid. Much of this can be fixed just by switching to use
+# a just-built-clang binary for the compiles.
 
-SET( CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules )
+# Detect whether the current target platform is 32-bit or 64-bit, and setup
+# the correct commandline flags needed to attempt to target 32-bit and 64-bit.
+if(CMAKE_SIZEOF_VOID_P EQUAL 4)
+  set(TARGET_X86_64_CFLAGS "-m64")
+  set(TARGET_I386_CFLAGS "")
+else()
+  if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+    message(FATAL_ERROR "Please use a sane architecture with 4 or 8 byte pointers.")
+  endif()
+  set(TARGET_X86_64_CFLAGS "")
+  set(TARGET_I386_CFLAGS "-m32")
+endif()
 
-# add definitions
-include(DefineCompilerFlags)
+# Try to compile a very simple source file to ensure we can target the given
+# platform. We use the results of these tests to build only the various target
+# runtime libraries supported by our current compilers cross-compiling
+# abilities.
+set(SIMPLE_SOURCE64 ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple64.c)
+file(WRITE ${SIMPLE_SOURCE64} "#include <stdlib.h>\nint main() {}")
+try_compile(CAN_TARGET_X86_64 ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE64}
+            COMPILE_DEFINITIONS "${TARGET_X86_64_CFLAGS}"
+            CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${TARGET_X86_64_CFLAGS}")
 
-# Disallow in-source build
-INCLUDE( MacroEnsureOutOfSourceBuild )
-MACRO_ENSURE_OUT_OF_SOURCE_BUILD(
- "${PROJECT_NAME} requires an out of source build. Please create a separate build directory and run 'cmake /path/to/${PROJECT_NAME} [options]' there."
- )
+set(SIMPLE_SOURCE32 ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple32.c)
+file(WRITE ${SIMPLE_SOURCE32} "#include <stdlib.h>\nint main() {}")
+try_compile(CAN_TARGET_I386 ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE32}
+            COMPILE_DEFINITIONS "${TARGET_I386_CFLAGS}"
+            CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${TARGET_I386_CFLAGS}")
 
-INCLUDE( ${CMAKE_SOURCE_DIR}/cmake/ConfigureChecks.cmake )
-CONFIGURE_FILE( ${CMAKE_SOURCE_DIR}/cmake/config.h.cmake
-                ${CMAKE_CURRENT_BINARY_DIR}/config.h )
+add_subdirectory(lib)
 
-INCLUDE_DIRECTORIES(
- ${CMAKE_CURRENT_BINARY_DIR}
-)
-
-SET( Achitectures
- i386 x86_64 ppc arm
- )
-
-SET( Configurations
- Debug Release Profile
- )
-
-# Only build Blocks Runtime if the compiler has enough support
-IF( WIN32 OR MSVC OR HAVE_OSATOMIC_COMPARE_AND_SWAP_INT OR HAVE_SYNC_BOOL_COMPARE_AND_SWAP_INT )
-  SET(BUILD_BLOCKS_RUNTIME TRUE)
-ELSE( WIN32 OR MSVC OR HAVE_OSATOMIC_COMPARE_AND_SWAP_INT OR HAVE_SYNC_BOOL_COMPARE_AND_SWAP_INT )
-  SET(BUILD_BLOCKS_RUNTIME FALSE)
-ENDIF( WIN32 OR MSVC OR HAVE_OSATOMIC_COMPARE_AND_SWAP_INT OR HAVE_SYNC_BOOL_COMPARE_AND_SWAP_INT )
-
-IF( BUILD_BLOCKS_RUNTIME )
-  ADD_SUBDIRECTORY( BlocksRuntime )
-ELSE( BUILD_BLOCKS_RUNTIME )
-  MESSAGE(STATUS "No suitable atomic operation routines detected, skipping Blocks Runtime")
-ENDIF( BUILD_BLOCKS_RUNTIME )
-
-ADD_SUBDIRECTORY( lib )
-
-# Enable Test Suit:
-INCLUDE( MacroAddCheckTest )
-ADD_SUBDIRECTORY( test )
+if(LLVM_INCLUDE_TESTS)
+  add_subdirectory(test)
+endif()
diff --git a/LICENSE.TXT b/LICENSE.TXT
index 2552e8c..14925ca 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -14,7 +14,7 @@
 University of Illinois/NCSA
 Open Source License
 
-Copyright (c) 2009-2010 by the contributors listed in CREDITS.TXT
+Copyright (c) 2009-2012 by the contributors listed in CREDITS.TXT
 
 All rights reserved.
 
@@ -55,7 +55,7 @@
 
 ==============================================================================
 
-Copyright (c) 2009-2010 by the contributors listed in CREDITS.TXT
+Copyright (c) 2009-2012 by the contributors listed in CREDITS.TXT
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -74,3 +74,24 @@
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
+
+==============================================================================
+Copyrights and Licenses for Third Party Software Distributed with LLVM:
+==============================================================================
+The LLVM software contains code written by third parties.  Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the LLVM Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+Software.
+
+The following pieces of software have additional or alternate copyrights,
+licenses, and/or restrictions:
+
+Program             Directory
+-------             ---------
+mach_override       lib/asan/interception/mach_override
diff --git a/Makefile b/Makefile
index 5305906..642b654 100644
--- a/Makefile
+++ b/Makefile
@@ -164,6 +164,7 @@
 $(call Set,Tmp.Arch,$(1))
 $(call Set,Tmp.ObjPath,$(ProjObjRoot)/$(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch))
 $(call Set,Tmp.Functions,$(strip \
+  $(AlwaysRequiredModules) \
   $(call GetCNAVar,FUNCTIONS,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
 $(call Set,Tmp.Optimized,$(strip \
   $(call GetCNAVar,OPTIMIZED,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
@@ -226,7 +227,10 @@
 	$(Verb) $(Tmp.CC) $(Tmp.CFLAGS) -c -o $$@ $$<
 $(Tmp.ObjPath)/%.o: $(Tmp.SrcPath)/%.c $(Tmp.Dependencies) $(Tmp.ObjPath)/.dir
 	$(Summary) "  COMPILE:   $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$<"
-	$(Verb) $(Tmp.CC) $(Tmp.CFLAGS) -c -o $$@ $$<
+	$(Verb) $(Tmp.CC) $(Tmp.CFLAGS) -c $(COMMON_CFLAGS) -o $$@ $$<
+$(Tmp.ObjPath)/%.o: $(Tmp.SrcPath)/%.cc $(Tmp.Dependencies) $(Tmp.ObjPath)/.dir
+	$(Summary) "  COMPILE:   $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$<"
+	$(Verb) $(Tmp.CC) $(Tmp.CFLAGS) -c $(COMMON_CXXFLAGS) -o $$@ $$<
 .PRECIOUS: $(Tmp.ObjPath)/.dir
 
 endef
diff --git a/README.txt b/README.txt
index cbeb10c..b37c0ae 100644
--- a/README.txt
+++ b/README.txt
@@ -106,6 +106,15 @@
 di_int __mulvdi3(di_int a, di_int b);  // a * b
 ti_int __mulvti3(ti_int a, ti_int b);  // a * b
 
+
+// Integral arithmetic which returns if overflow
+
+si_int __mulosi4(si_int a, si_int b, int* overflow);  // a * b, overflow set to one if result not in signed range
+di_int __mulodi4(di_int a, di_int b, int* overflow);  // a * b, overflow set to one if result not in signed range
+ti_int __muloti4(ti_int a, ti_int b, int* overflow);  // a * b, overflow set to
+ one if result not in signed range
+
+
 //  Integral comparison: a  < b -> 0
 //                       a == b -> 1
 //                       a  > b -> 2
diff --git a/SDKs/README.txt b/SDKs/README.txt
new file mode 100644
index 0000000..b95575e
--- /dev/null
+++ b/SDKs/README.txt
@@ -0,0 +1,9 @@
+It is often convenient to be able to build compiler-rt libraries for a certain
+platform without having a full SDK or development environment installed.
+
+This makes it easy for users to build a compiler which can target a number of
+different platforms, without having to actively maintain full development
+environments for those platforms.
+
+Since compiler-rt's libraries typically have minimal interaction with the
+system, we achieve this by stubbing out the SDKs of certain platforms.
diff --git a/SDKs/darwin/README.txt b/SDKs/darwin/README.txt
new file mode 100644
index 0000000..ea30af3
--- /dev/null
+++ b/SDKs/darwin/README.txt
@@ -0,0 +1,3 @@
+The Darwin platforms are all similar enough we roll them into one SDK, and use
+preprocessor tricks to get the right definitions for the few things which
+diverge between OS X and iOS.
diff --git a/SDKs/darwin/usr/include/limits.h b/SDKs/darwin/usr/include/limits.h
new file mode 100644
index 0000000..5495a78
--- /dev/null
+++ b/SDKs/darwin/usr/include/limits.h
@@ -0,0 +1,23 @@
+/* ===-- limits.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __LIMITS_H__
+#define __LIMITS_H__
+
+/* This is only here as a landing pad for the include_next from the compiler's
+   built-in limits.h. */
+
+#endif /* __LIMITS_H__ */
diff --git a/SDKs/darwin/usr/include/stdio.h b/SDKs/darwin/usr/include/stdio.h
new file mode 100644
index 0000000..3b56036
--- /dev/null
+++ b/SDKs/darwin/usr/include/stdio.h
@@ -0,0 +1,61 @@
+/* ===-- stdio.h - stub SDK header for compiler-rt --------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STDIO_H__
+#define __STDIO_H__
+
+typedef struct __sFILE FILE;
+typedef __SIZE_TYPE__ size_t;
+
+/* Determine the appropriate fopen() and fwrite() functions. */
+#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
+#  if defined(__i386)
+#    define __FOPEN_NAME "_fopen$UNIX2003"
+#    define __FWRITE_NAME "_fwrite$UNIX2003"
+#  elif defined(__x86_64__)
+#    define __FOPEN_NAME "_fopen"
+#    define __FWRITE_NAME "_fwrite"
+#  elif defined(__arm)
+#    define __FOPEN_NAME "_fopen"
+#    define __FWRITE_NAME "_fwrite"
+#  else
+#    error "unrecognized architecture for targetting OS X"
+#  endif
+#elif defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__)
+#  if defined(__i386) || defined (__x86_64)
+#    define __FOPEN_NAME "_fopen"
+#    define __FWRITE_NAME "_fwrite"
+#  elif defined(__arm)
+#    define __FOPEN_NAME "_fopen"
+#    define __FWRITE_NAME "_fwrite"
+#  else
+#    error "unrecognized architecture for targetting iOS"
+#  endif
+#else
+#  error "unrecognized architecture for targetting Darwin"
+#endif
+
+#    define stderr __stderrp
+extern FILE *__stderrp;
+
+int fclose(FILE *);
+int fflush(FILE *);
+FILE *fopen(const char * restrict, const char * restrict) __asm(__FOPEN_NAME);
+int fprintf(FILE * restrict, const char * restrict, ...);
+size_t fwrite(const void * restrict, size_t, size_t, FILE * restrict)
+  __asm(__FWRITE_NAME);
+
+#endif /* __STDIO_H__ */
diff --git a/SDKs/darwin/usr/include/stdlib.h b/SDKs/darwin/usr/include/stdlib.h
new file mode 100644
index 0000000..cf65df4
--- /dev/null
+++ b/SDKs/darwin/usr/include/stdlib.h
@@ -0,0 +1,29 @@
+/* ===-- stdlib.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STDLIB_H__
+#define __STDLIB_H__
+
+#define NULL ((void *)0)
+
+typedef __SIZE_TYPE__ size_t;
+
+void abort(void) __attribute__((__noreturn__));
+void free(void *);
+char *getenv(const char *);
+void *malloc(size_t);
+
+#endif /* __STDLIB_H__ */
diff --git a/SDKs/darwin/usr/include/string.h b/SDKs/darwin/usr/include/string.h
new file mode 100644
index 0000000..bee9d46
--- /dev/null
+++ b/SDKs/darwin/usr/include/string.h
@@ -0,0 +1,30 @@
+/* ===-- string.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STRING_H__
+#define __STRING_H__
+
+typedef __SIZE_TYPE__ size_t;
+
+int memcmp(const void *, const void *, size_t);
+void *memcpy(void *, const void *, size_t);
+char *strcat(char *, const char *);
+char *strcpy(char *, const char *);
+char *strdup(const char *);
+size_t strlen(const char *);
+char *strncpy(char *, const char *, size_t);
+
+#endif /* __STRING_H__ */
diff --git a/SDKs/darwin/usr/include/sys/stat.h b/SDKs/darwin/usr/include/sys/stat.h
new file mode 100644
index 0000000..6225f90
--- /dev/null
+++ b/SDKs/darwin/usr/include/sys/stat.h
@@ -0,0 +1,25 @@
+/* ===-- stat.h - stub SDK header for compiler-rt ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __SYS_STAT_H__
+#define __SYS_STAT_H__
+
+typedef unsigned short uint16_t;
+typedef uint16_t mode_t;
+
+int mkdir(const char *, mode_t);
+
+#endif /* __SYS_STAT_H__ */
diff --git a/SDKs/darwin/usr/include/sys/types.h b/SDKs/darwin/usr/include/sys/types.h
new file mode 100644
index 0000000..b425767
--- /dev/null
+++ b/SDKs/darwin/usr/include/sys/types.h
@@ -0,0 +1,20 @@
+/* ===-- types.h - stub SDK header for compiler-rt --------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __SYS_TYPES_H__
+#define __SYS_TYPES_H__
+
+#endif /* __SYS_TYPES_H__ */
diff --git a/SDKs/linux/README.txt b/SDKs/linux/README.txt
new file mode 100644
index 0000000..aa0604a
--- /dev/null
+++ b/SDKs/linux/README.txt
@@ -0,0 +1,2 @@
+This is a stub SDK for Linux. Currently, this has only been tested on i386 and
+x86_64 using the Clang compiler.
diff --git a/SDKs/linux/usr/include/endian.h b/SDKs/linux/usr/include/endian.h
new file mode 100644
index 0000000..95528db
--- /dev/null
+++ b/SDKs/linux/usr/include/endian.h
@@ -0,0 +1,29 @@
+/* ===-- endian.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __ENDIAN_H__
+#define __ENDIAN_H__
+
+#define __LITTLE_ENDIAN 1234
+#define __BIG_ENDIAN 4321
+
+#if defined(__LITTLE_ENDIAN__) || defined(__ORDER_LITTLE_ENDIAN__)
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#else
+#define __BYTE_ORDER __BIG_ENDIAN
+#endif
+
+#endif /* __ENDIAN_H__ */
diff --git a/SDKs/linux/usr/include/limits.h b/SDKs/linux/usr/include/limits.h
new file mode 100644
index 0000000..5495a78
--- /dev/null
+++ b/SDKs/linux/usr/include/limits.h
@@ -0,0 +1,23 @@
+/* ===-- limits.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __LIMITS_H__
+#define __LIMITS_H__
+
+/* This is only here as a landing pad for the include_next from the compiler's
+   built-in limits.h. */
+
+#endif /* __LIMITS_H__ */
diff --git a/SDKs/linux/usr/include/stdio.h b/SDKs/linux/usr/include/stdio.h
new file mode 100644
index 0000000..ddfe755
--- /dev/null
+++ b/SDKs/linux/usr/include/stdio.h
@@ -0,0 +1,35 @@
+/* ===-- stdio.h - stub SDK header for compiler-rt --------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STDIO_H__
+#define __STDIO_H__
+
+typedef __SIZE_TYPE__ size_t;
+
+struct _IO_FILE;
+typedef struct _IO_FILE FILE;
+
+extern struct _IO_FILE *stdin;
+extern struct _IO_FILE *stdout;
+extern struct _IO_FILE *stderr;
+
+extern int fclose(FILE *);
+extern int fflush(FILE *);
+extern FILE *fopen(const char * restrict, const char * restrict);
+extern int fprintf(FILE * restrict, const char * restrict, ...);
+extern size_t fwrite(const void * restrict, size_t, size_t, FILE * restrict);
+
+#endif /* __STDIO_H__ */
diff --git a/SDKs/linux/usr/include/stdlib.h b/SDKs/linux/usr/include/stdlib.h
new file mode 100644
index 0000000..b3755df
--- /dev/null
+++ b/SDKs/linux/usr/include/stdlib.h
@@ -0,0 +1,32 @@
+/* ===-- stdlib.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STDLIB_H__
+#define __STDLIB_H__
+
+#define NULL ((void *)0)
+
+typedef __SIZE_TYPE__ size_t;
+
+void abort(void) __attribute__((__nothrow__)) __attribute__((__noreturn__));
+void free(void *) __attribute__((__nothrow__));
+char *getenv(const char *) __attribute__((__nothrow__))
+  __attribute__((__nonnull__(1)));
+  __attribute__((__warn_unused_result__));
+void *malloc(size_t) __attribute__((__nothrow__)) __attribute((__malloc__))
+     __attribute__((__warn_unused_result__));
+
+#endif /* __STDLIB_H__ */
diff --git a/SDKs/linux/usr/include/string.h b/SDKs/linux/usr/include/string.h
new file mode 100644
index 0000000..bee9d46
--- /dev/null
+++ b/SDKs/linux/usr/include/string.h
@@ -0,0 +1,30 @@
+/* ===-- string.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __STRING_H__
+#define __STRING_H__
+
+typedef __SIZE_TYPE__ size_t;
+
+int memcmp(const void *, const void *, size_t);
+void *memcpy(void *, const void *, size_t);
+char *strcat(char *, const char *);
+char *strcpy(char *, const char *);
+char *strdup(const char *);
+size_t strlen(const char *);
+char *strncpy(char *, const char *, size_t);
+
+#endif /* __STRING_H__ */
diff --git a/SDKs/linux/usr/include/sys/mman.h b/SDKs/linux/usr/include/sys/mman.h
new file mode 100644
index 0000000..7c4d051
--- /dev/null
+++ b/SDKs/linux/usr/include/sys/mman.h
@@ -0,0 +1,29 @@
+/* ===-- limits.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __SYS_MMAN_H__
+#define __SYS_MMAN_H__
+
+typedef __SIZE_TYPE__ size_t;
+
+#define PROT_READ 0x1
+#define PROT_WRITE 0x2
+#define PROT_EXEC 0x4
+
+extern int mprotect (void *__addr, size_t __len, int __prot)
+  __attribute__((__nothrow__));
+
+#endif /* __SYS_MMAN_H__ */
diff --git a/SDKs/linux/usr/include/sys/stat.h b/SDKs/linux/usr/include/sys/stat.h
new file mode 100644
index 0000000..0449fdd
--- /dev/null
+++ b/SDKs/linux/usr/include/sys/stat.h
@@ -0,0 +1,24 @@
+/* ===-- stat.h - stub SDK header for compiler-rt ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __SYS_STAT_H__
+#define __SYS_STAT_H__
+
+typedef unsigned int mode_t;
+
+int mkdir(const char *, mode_t);
+
+#endif /* __SYS_STAT_H__ */
diff --git a/SDKs/linux/usr/include/sys/types.h b/SDKs/linux/usr/include/sys/types.h
new file mode 100644
index 0000000..10e74bb
--- /dev/null
+++ b/SDKs/linux/usr/include/sys/types.h
@@ -0,0 +1,20 @@
+/* ===-- stat.h - stub SDK header for compiler-rt ---------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __SYS_TYPES_H__
+#define __SYS_TYPES_H__
+
+#endif /* __SYS_TYPES_H__ */
diff --git a/SDKs/linux/usr/include/unistd.h b/SDKs/linux/usr/include/unistd.h
new file mode 100644
index 0000000..773b081
--- /dev/null
+++ b/SDKs/linux/usr/include/unistd.h
@@ -0,0 +1,26 @@
+/* ===-- unistd.h - stub SDK header for compiler-rt -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This is a stub SDK header file. This file is not part of the interface of
+ * this library nor an official version of the appropriate SDK header. It is
+ * intended only to stub the features of this header required by compiler-rt.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef __UNISTD_H__
+#define __UNISTD_H__
+
+enum {
+  _SC_PAGESIZE = 30
+};
+
+extern long int sysconf (int __name) __attribute__ ((__nothrow__));
+
+#endif /* __UNISTD_H__ */
diff --git a/cmake/ConfigureChecks.cmake b/cmake/ConfigureChecks.cmake
deleted file mode 100644
index b72a390..0000000
--- a/cmake/ConfigureChecks.cmake
+++ /dev/null
@@ -1,38 +0,0 @@
-INCLUDE( CheckIncludeFile )
-INCLUDE( CheckFunctionExists )
-INCLUDE( CheckSymbolExists )
-INCLUDE( CheckCSourceCompiles )
-
-SET( PACKAGE ${PACKAGE_NAME} )
-SET( VERSION ${PACKAGE_VERSION} )
-
-SET( BINARYDIR ${CMAKE_BINARY_DIR} )
-SET( SOURCEDIR ${CMAKE_SOURCE_DIR} )
-
-# HEADER FILES
-CHECK_INCLUDE_FILE( sys/byteorder.h HAVE_SYS_BYTEORDER_H )
-CHECK_INCLUDE_FILE( AvailabilityMacros.h HAVE_AVAILABILITY_MACROS_H )
-CHECK_INCLUDE_FILE( TargetConditionals.h HAVE_TARGET_CONDITIONALS_H )
-CHECK_INCLUDE_FILE( libkern/OSAtomic.h HAVE_LIBKERN_OSATOMIC_H )
-
-# FUNCTIONS
-CHECK_FUNCTION_EXISTS( sysconf HAVE_SYSCONF )
-CHECK_SYMBOL_EXISTS( OSAtomicCompareAndSwapInt libkern/OSAtomic.h HAVE_OSATOMIC_COMPARE_AND_SWAP_INT )
-CHECK_SYMBOL_EXISTS( OSAtomicCompareAndSwapLong libkern/OSAtomic.h HAVE_OSATOMIC_COMPARE_AND_SWAP_LONG )
-
-# BUILTIN
-CHECK_C_SOURCE_COMPILES( "
-volatile int a;
-int main(int argc, char *argv[]) {
-  (void)__sync_bool_compare_and_swap(&a, 1, 2);
-  return 0;
-}
-" HAVE_SYNC_BOOL_COMPARE_AND_SWAP_INT )
-
-CHECK_C_SOURCE_COMPILES( "
-volatile long a;
-int main(int argc, char *argv[]) {
-  (void)__sync_bool_compare_and_swap(&a, 1, 2);
-  return 0;
-}
-" HAVE_SYNC_BOOL_COMPARE_AND_SWAP_LONG )
diff --git a/cmake/Modules/DefineCompilerFlags.cmake b/cmake/Modules/DefineCompilerFlags.cmake
deleted file mode 100644
index 9e262b9..0000000
--- a/cmake/Modules/DefineCompilerFlags.cmake
+++ /dev/null
@@ -1,6 +0,0 @@
-# Define compiler flags
-
-if( CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX )
-  #ADD_DEFINITIONS( -Wall -W -Werror -pedantic )
-  ADD_DEFINITIONS( -std=c99 -Wall -Wextra -W -pedantic -Wno-unused-parameter )
-endif( CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX )
diff --git a/cmake/Modules/MacroAddCheckTest.cmake b/cmake/Modules/MacroAddCheckTest.cmake
deleted file mode 100644
index a139121..0000000
--- a/cmake/Modules/MacroAddCheckTest.cmake
+++ /dev/null
@@ -1,12 +0,0 @@
-# - macro_add_check_test(test_name test_source linklib1 ... linklibN)
-
-ENABLE_TESTING()
-include(CTest)
-set(CMAKE_C_FLAGS_PROFILING "-g -pg")
-
-macro (MACRO_ADD_CHECK_TEST _testName _testSource)
-  add_executable(${_testName} ${_testSource})
-  target_link_libraries(${_testName} ${ARGN})
-  get_target_property(_targetLocation ${_testName} LOCATION) 
-  add_test(${_testName} ${_targetLocation})
-endmacro (MACRO_ADD_CHECK_TEST)
diff --git a/cmake/Modules/MacroEnsureOutOfSourceBuild.cmake b/cmake/Modules/MacroEnsureOutOfSourceBuild.cmake
deleted file mode 100644
index a066936..0000000
--- a/cmake/Modules/MacroEnsureOutOfSourceBuild.cmake
+++ /dev/null
@@ -1,18 +0,0 @@
-# MACRO_ENSURE_OUT_OF_SOURCE_BUILD(<errorMessage>)
-
-macro( MACRO_ENSURE_OUT_OF_SOURCE_BUILD _errorMessage )
-
-string( COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" _insource )
-if( _insource )
- message( SEND_ERROR "${_errorMessage}" )
- message( FATAL_ERROR
- "In-source builds are not allowed.
- CMake would overwrite the makefiles distributed with Compiler-RT.
- Please create a directory and run cmake from there, passing the path
- to this source directory as the last argument.
- This process created the file `CMakeCache.txt' and the directory `CMakeFiles'.
- Please delete them."
- )
-endif( _insource )
-
-endmacro( MACRO_ENSURE_OUT_OF_SOURCE_BUILD )
diff --git a/cmake/config.h.cmake b/cmake/config.h.cmake
deleted file mode 100644
index 307e277..0000000
--- a/cmake/config.h.cmake
+++ /dev/null
@@ -1,12 +0,0 @@
-#cmakedefine HAVE_SYS_BYTEORDER_H ${HAVE_SYS_BYTEORDER}
-#cmakedefine HAVE_AVAILABILITY_MACROS_H ${HAVE_AVAILABILITY_MACROS_H}
-#cmakedefine HAVE_TARGET_CONDITIONALS_H ${HAVE_TARGET_CONDITIONALS_H}
-#cmakedefine HAVE_LIBKERN_OSATOMIC_H ${HAVE_LIBKERN_OSATOMIC_H}
-
-#cmakedefine HAVE_SYSCONF ${HAVE_SYSCONF}
-
-#cmakedefine HAVE_OSATOMIC_COMPARE_AND_SWAP_INT ${HAVE_OSATOMIC_COMPARE_AND_SWAP_INT}
-#cmakedefine HAVE_OSATOMIC_COMPARE_AND_SWAP_LONG ${HAVE_OSATOMIC_COMPARE_AND_SWAP_LONG}
-
-#cmakedefine HAVE_SYNC_BOOL_COMPARE_AND_SWAP_INT ${HAVE_SYNC_BOOL_COMPARE_AND_SWAP_INT}
-#cmakedefine HAVE_SYNC_BOOL_COMPARE_AND_SWAP_LONG ${HAVE_SYNC_BOOL_COMPARE_AND_SWAP_LONG}
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index e29474a..433e0af 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -1,81 +1,28 @@
-#
-# Create a library called "CompilerRT" which includes the source files.
+# The top-level lib directory contains a large amount of C code which provides
+# generic implementations of the core runtime library along with optimized
+# architecture-specific code in various subdirectories.
 
-#INCLUDE_DIRECTORIES(
-# ${CMAKE_CURRENT_BINARY_DIR}
-#)
+file(GLOB GENERIC_SOURCES . "*.c")
 
-# Generic functions needed for each architecture
-# libcompiler_rt.Generic.a    libcompiler_rt.Optimized.a
+# FIXME: We don't currently support building an atomic library, and as it must
+# be a separate library from the runtime library, we need to remove its source
+# code from the glob.
+file(GLOB ATOMIC . "atomic.c")
+list(REMOVE_ITEM GENERIC_SOURCES ${ATOMIC})
 
-# Generic
-SET( Generic_SRCS
- absvdi2.c absvsi2.c addvdi3.c addvsi3.c ashldi3.c ashrdi3.c
- clzdi2.c clzsi2.c cmpdi2.c ctzdi2.c ctzsi2.c
- divdc3.c divdi3.c divsc3.c ffsdi2.c
- fixdfdi.c fixsfdi.c fixunsdfdi.c fixunsdfsi.c fixunssfdi.c
- fixunssfsi.c floatdidf.c floatdisf.c floatundidf.c floatundisf.c
- gcc_personality_v0.c lshrdi3.c moddi3.c muldc3.c muldi3.c
- mulsc3.c mulvdi3.c mulvsi3.c negdi2.c negvdi2.c negvsi2.c
- paritydi2.c paritysi2.c popcountdi2.c popcountsi2.c powidf2.c
- powisf2.c subvdi3.c subvsi3.c ucmpdi2.c udivdi3.c
- udivmoddi4.c umoddi3.c apple_versioning.c eprintf.c
- )
+if(CAN_TARGET_X86_64)
+  file(GLOB X86_64_SOURCES . "*.c" "*.S")
+  add_library(clang_rt.x86_64 STATIC ${X86_64_SOURCES} ${GENERIC_SOURCES})
+  set_target_properties(clang_rt.x86_64 PROPERTIES COMPILE_FLAGS "${TARGET_X86_64_CFLAGS}")
+endif()
+if(CAN_TARGET_I386)
+  file(GLOB I386_SOURCES . "*.c" "*.S")
+  add_library(clang_rt.i386 STATIC ${I386_SOURCES} ${GENERIC_SOURCES})
+  set_target_properties(clang_rt.i386 PROPERTIES COMPILE_FLAGS "${TARGET_I386_CFLAGS}")
+endif()
 
-# Optimized functions for each architecture
+# Also support building feature-based runtime libraries in their various
+# subdircetories.
+add_subdirectory(asan)
 
-# Commenting out for the min until the basics are working first.
-# ADD_SUBDIRECTORY( ppc )
-# ADD_SUBDIRECTORY( x86_64 )
-# ADD_SUBDIRECTORY( i386 )
-# ADD_SUBDIRECTORY( arm )
-
-# List of functions needed for each architecture.
-SET( i386_Functions
- divxc3.c fixunsxfdi.c fixunsxfsi.c fixxfdi.c floatdixf.c
- floatundixf.c mulxc3.c powixf2.c clear_cache.c enable_execute_stack.c 
- )
-
-SET( x86_64_Functions
- absvti2.c addvti3.c ashlti3.c ashrti3.c clzti2.c cmpti2.c
- ctzti2.c divti3.c divxc3.c ffsti2.c fixdfti.c fixsfti.c
- fixunsdfti.c fixunssfti.c fixunsxfdi.c fixunsxfsi.c
- fixunsxfti.c fixxfdi.c fixxfti.c floatdixf.c floattidf.c
- floattisf.c floattixf.c floatundixf.c floatuntidf.c
- floatuntisf.c floatuntixf.c lshrti3.c modti3.c multi3.c
- mulvti3.c mulxc3.c negti2.c negvti2.c parityti2.c
- popcountti2.c powixf2.c subvti3.c ucmpti2.c udivmodti4.c
- udivti3.c umodti3.c clear_cache.c enable_execute_stack.c
- )
-
-SET( PPC_Functions
- divtc3.c fixtfdi.c fixunstfdi.c floatditf.c floatunditf.c
- gcc_qadd.c gcc_qdiv.c gcc_qmul.c gcc_qsub.c multc3.c
- powitf2.c restFP.c saveFP.c trampoline_setup.c
- clear_cache.c enable_execute_stack.c
- )
-
-SET( ARM_Functions
- adddf3vfp.c addsf3vfp.c bswapdi2.c bswapsi2.c divdf3vfp.c
- divsf3vfp.c eqdf2vfp.c eqsf2vfp.c extendsfdf2vfp.c
- fixdfsivfp.c fixsfsivfp.c fixunsdfsivfp.c fixunssfsivfp.c
- floatsidfvfp.c floatsisfvfp.c floatunssidfvfp.c floatunssisfvfp.c
- gedf2vfp.c gesf2vfp.c gtdf2vfp.c gtsf2vfp.c
- ledf2vfp.c lesf2vfp.c ltdf2vfp.c ltsf2vfp.c
- muldf3vfp.c mulsf3vfp.c
- nedf2vfp.c negdf2vfp.c negsf2vfp.c nesf2vfp.c
- subdf3vfp.c subsf3vfp.c truncdfsf2vfp.c unorddf2vfp.c unordsf2vfp.c
- modsi3.c umodsi3.c udivsi3.c divsi3.c switch.c
- )
-
-#FOREACH( LOOP_VAR ${Achitectures} )
-# See ARCHIVE_OUTPUT_DIRECTORY docs.
-#${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/${LOOP_VAR}
-#ENDFOREACH
-
-ADD_LIBRARY( ${PROJECT_NAME}-Generic STATIC ${Generic_SRCS} )
-#ADD_LIBRARY( ${PROJECT_NAME}-i386 STATIC ${i386_Functions} )
-
-# [[debug|optimized|general]
-#TARGET_LINK_LIBRARIES( ${PROJECT_NAME} ${PROJECT_NAME}-Common optimized ${PROJECT_NAME}-i386 )
-
+# FIXME: Add support for the profile library.
diff --git a/lib/Makefile.mk b/lib/Makefile.mk
index 1ed8c11..37aaa9c 100644
--- a/lib/Makefile.mk
+++ b/lib/Makefile.mk
@@ -7,9 +7,23 @@
 #
 #===------------------------------------------------------------------------===#
 
-SubDirs := i386 ppc x86_64 arm
+ModuleName := builtins
+SubDirs :=
 
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+# Add arch specific optimized implementations.
+SubDirs += i386 ppc x86_64 arm
+
+# Add other submodules.
+SubDirs += asan
+SubDirs += profile
+
+# FIXME: We don't currently support building an atomic library, and as it must
+# be a separate library from the runtime library, we need to remove its source
+# code from the source files list.
+ExcludedSources := atomic.c
+
+# Define the variables for this specific directory.
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(filter-out $(ExcludedSources),$(notdir $(file))))
 ObjNames := $(Sources:%.c=%.o)
 Implementation := Generic
 
diff --git a/lib/abi.h b/lib/abi.h
deleted file mode 100644
index 2534317..0000000
--- a/lib/abi.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* ===------ abi.h - configuration header for compiler-rt  -----------------===
- *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is dual licensed under the MIT and the University of Illinois Open
- * Source Licenses. See LICENSE.TXT for details.
- *
- * ===----------------------------------------------------------------------===
- *
- * This file is a configuration header for compiler-rt.
- * This file is not part of the interface of this library.
- *
- * ===----------------------------------------------------------------------===
- */
-
-#if __ARM_EABI__
-# define ARM_EABI_FNALIAS(aeabi_name, name)         \
-  void __aeabi_##aeabi_name() __attribute__((alias("__" #name)));
-# define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
-#else
-# define ARM_EABI_FNALIAS(aeabi_name, name)
-# define COMPILER_RT_ABI
-#endif
diff --git a/lib/absvdi2.c b/lib/absvdi2.c
index 9c5d4a2..682c235 100644
--- a/lib/absvdi2.c
+++ b/lib/absvdi2.c
@@ -11,10 +11,8 @@
  *
  *===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: absolute value */
 
diff --git a/lib/absvsi2.c b/lib/absvsi2.c
index 80a1a78..4812af8 100644
--- a/lib/absvsi2.c
+++ b/lib/absvsi2.c
@@ -11,10 +11,8 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: absolute value */
 
diff --git a/lib/absvti2.c b/lib/absvti2.c
index 9e73a26..8f2bddc 100644
--- a/lib/absvti2.c
+++ b/lib/absvti2.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: absolute value */
 
diff --git a/lib/adddf3.c b/lib/adddf3.c
index 3cc997b..7eb40a1 100644
--- a/lib/adddf3.c
+++ b/lib/adddf3.c
@@ -12,8 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "abi.h"
-
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
diff --git a/lib/addsf3.c b/lib/addsf3.c
index 20610ef..e57270a 100644
--- a/lib/addsf3.c
+++ b/lib/addsf3.c
@@ -12,8 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "abi.h"
-
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
diff --git a/lib/addvdi3.c b/lib/addvdi3.c
index 51ad397..db45a27 100644
--- a/lib/addvdi3.c
+++ b/lib/addvdi3.c
@@ -11,10 +11,8 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a + b */
 
diff --git a/lib/addvsi3.c b/lib/addvsi3.c
index c18f7bd..81f515c 100644
--- a/lib/addvsi3.c
+++ b/lib/addvsi3.c
@@ -11,10 +11,8 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a + b */
 
diff --git a/lib/addvti3.c b/lib/addvti3.c
index ba220f5..9105c17 100644
--- a/lib/addvti3.c
+++ b/lib/addvti3.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a + b */
 
diff --git a/lib/arm/CMakeLists.txt b/lib/arm/CMakeLists.txt
deleted file mode 100644
index e69de29..0000000
--- a/lib/arm/CMakeLists.txt
+++ /dev/null
diff --git a/lib/arm/Makefile.mk b/lib/arm/Makefile.mk
index cde97c3..e7bbd7b 100644
--- a/lib/arm/Makefile.mk
+++ b/lib/arm/Makefile.mk
@@ -7,6 +7,7 @@
 #
 #===------------------------------------------------------------------------===#
 
+ModuleName := builtins
 SubDirs := 
 OnlyArchs := armv5 armv6 armv7
 
diff --git a/lib/arm/adddf3vfp.S b/lib/arm/adddf3vfp.S
index cced1e0..c90b0c2 100644
--- a/lib/arm/adddf3vfp.S
+++ b/lib/arm/adddf3vfp.S
@@ -15,10 +15,11 @@
 // Adds two double precision floating point numbers using the Darwin
 // calling convention where double arguments are passsed in GPR pairs
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
-	fmdrr	d6, r0, r1		// move first param from r0/r1 pair into d6
-	fmdrr	d7, r2, r3		// move second param from r2/r3 pair into d7
-	faddd	d6, d6, d7		
-	fmrrd	r0, r1, d6		// move result back to r0/r1 pair
+	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
+	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
+	vadd.f64 d6, d6, d7		
+	vmov	r0, r1, d6		// move result back to r0/r1 pair
 	bx	lr
diff --git a/lib/arm/addsf3vfp.S b/lib/arm/addsf3vfp.S
index b747528..43653d5 100644
--- a/lib/arm/addsf3vfp.S
+++ b/lib/arm/addsf3vfp.S
@@ -15,10 +15,11 @@
 // Adds two single precision floating point numbers using the Darwin
 // calling convention where single arguments are passsed in GPRs
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
-	fmsr	s14, r0		// move first param from r0 into float register
-	fmsr	s15, r1		// move second param from r1 into float register
-	fadds	s14, s14, s15
-	fmrs	r0, s14		// move result back to r0
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vadd.f32 s14, s14, s15
+	vmov	r0, s14		// move result back to r0
 	bx	lr
diff --git a/lib/arm/aeabi_idivmod.S b/lib/arm/aeabi_idivmod.S
new file mode 100644
index 0000000..0237f22
--- /dev/null
+++ b/lib/arm/aeabi_idivmod.S
@@ -0,0 +1,27 @@
+//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+//   int rem, quot;
+//   quot = __divmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__divmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
diff --git a/lib/arm/aeabi_ldivmod.S b/lib/arm/aeabi_ldivmod.S
new file mode 100644
index 0000000..197c459
--- /dev/null
+++ b/lib/arm/aeabi_ldivmod.S
@@ -0,0 +1,30 @@
+//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int64_t quot, int64_t rem}
+//        __aeabi_ldivmod(int64_t numerator, int64_t denominator) {
+//   int64_t rem, quot;
+//   quot = __divmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+        push    {r11, lr}
+        sub     sp, sp, #16
+        add     r12, sp, #8
+        str     r12, [sp]
+        bl      SYMBOL_NAME(__divmoddi4)
+        ldr     r2, [sp, #8]
+        ldr     r3, [sp, #12]
+        add     sp, sp, #16
+        pop     {r11, pc}
diff --git a/lib/arm/aeabi_memcmp.S b/lib/arm/aeabi_memcmp.S
new file mode 100644
index 0000000..ca29c10
--- /dev/null
+++ b/lib/arm/aeabi_memcmp.S
@@ -0,0 +1,19 @@
+//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
+
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
+        b       memcmp
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
diff --git a/lib/arm/aeabi_memcpy.S b/lib/arm/aeabi_memcpy.S
new file mode 100644
index 0000000..8b9c7fd
--- /dev/null
+++ b/lib/arm/aeabi_memcpy.S
@@ -0,0 +1,19 @@
+//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
+
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
+        b       memcpy
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
diff --git a/lib/arm/aeabi_memmove.S b/lib/arm/aeabi_memmove.S
new file mode 100644
index 0000000..c94ed2b
--- /dev/null
+++ b/lib/arm/aeabi_memmove.S
@@ -0,0 +1,19 @@
+//===-- aeabi_memmove.S - EABI memmove implementation --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
+
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
+        b       memmove
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
diff --git a/lib/arm/aeabi_memset.S b/lib/arm/aeabi_memset.S
new file mode 100644
index 0000000..30ab4ba
--- /dev/null
+++ b/lib/arm/aeabi_memset.S
@@ -0,0 +1,32 @@
+//===-- aeabi_memset.S - EABI memset implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
+//  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
+
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
+        mov     r3, r1
+        mov     r1, r2
+        mov     r2, r3
+        b       memset
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
+        
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
+        mov     r2, r1
+        mov     r1, #0
+        b       memset
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
+
diff --git a/lib/arm/aeabi_uidivmod.S b/lib/arm/aeabi_uidivmod.S
new file mode 100644
index 0000000..f7e1d2e
--- /dev/null
+++ b/lib/arm/aeabi_uidivmod.S
@@ -0,0 +1,28 @@
+//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+//   unsigned rem, quot;
+//   quot = __udivmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__udivmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
diff --git a/lib/arm/aeabi_uldivmod.S b/lib/arm/aeabi_uldivmod.S
new file mode 100644
index 0000000..724049d
--- /dev/null
+++ b/lib/arm/aeabi_uldivmod.S
@@ -0,0 +1,30 @@
+//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { uint64_t quot, uint64_t rem}
+//        __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
+//   uint64_t rem, quot;
+//   quot = __udivmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+        push	{r11, lr}
+        sub	sp, sp, #16
+        add	r12, sp, #8
+        str	r12, [sp]
+        bl	SYMBOL_NAME(__udivmoddi4)
+        ldr	r2, [sp, #8]
+        ldr	r3, [sp, #12]
+        add	sp, sp, #16
+        pop	{r11, pc}
\ No newline at end of file
diff --git a/lib/arm/divdf3vfp.S b/lib/arm/divdf3vfp.S
index 74ef0ea..52de67f 100644
--- a/lib/arm/divdf3vfp.S
+++ b/lib/arm/divdf3vfp.S
@@ -15,10 +15,11 @@
 // Divides two double precision floating point numbers using the Darwin
 // calling convention where double arguments are passsed in GPR pairs
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
-	fmdrr	d6, r0, r1		// move first param from r0/r1 pair into d6
-	fmdrr	d7, r2, r3		// move second param from r2/r3 pair into d7
-	fdivd	d5, d6, d7		
-	fmrrd	r0, r1, d5		// move result back to r0/r1 pair
+	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
+	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
+	vdiv.f64 d5, d6, d7		
+	vmov	r0, r1, d5		// move result back to r0/r1 pair
 	bx	lr
diff --git a/lib/arm/divsf3vfp.S b/lib/arm/divsf3vfp.S
index 9eefcf3..81ba903 100644
--- a/lib/arm/divsf3vfp.S
+++ b/lib/arm/divsf3vfp.S
@@ -15,10 +15,11 @@
 // Divides two single precision floating point numbers using the Darwin
 // calling convention where single arguments are passsed like 32-bit ints.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
-	fmsr	s14, r0		// move first param from r0 into float register
-	fmsr	s15, r1		// move second param from r1 into float register
-	fdivs	s13, s14, s15
-	fmrs	r0, s13		// move result back to r0
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vdiv.f32 s13, s14, s15
+	vmov	r0, s13		// move result back to r0
 	bx	lr
diff --git a/lib/arm/eqdf2vfp.S b/lib/arm/eqdf2vfp.S
index 2998a76..c41e55a 100644
--- a/lib/arm/eqdf2vfp.S
+++ b/lib/arm/eqdf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where double precision arguments are passsed 
 // like in GPR pairs.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
-	fmdrr	d6, r0, r1	// load r0/r1 pair in double register
-	fmdrr	d7, r2, r3	// load r2/r3 pair in double register
-	fcmpd	d6, d7		
-	fmstat
+	vmov	d6, r0, r1	// load r0/r1 pair in double register
+	vmov	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7		
+	vmrs	apsr_nzcv, fpscr
 	moveq	r0, #1		// set result register to 1 if equal
 	movne	r0, #0
 	bx	lr
diff --git a/lib/arm/eqsf2vfp.S b/lib/arm/eqsf2vfp.S
index 927566e..730ef88 100644
--- a/lib/arm/eqsf2vfp.S
+++ b/lib/arm/eqsf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where single precision arguments are passsed 
 // like 32-bit ints
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
-	fmsr	s14, r0     // move from GPR 0 to float register
-	fmsr	s15, r1	    // move from GPR 1 to float register
-	fcmps	s14, s15
-	fmstat
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
 	moveq	r0, #1      // set result register to 1 if equal
 	movne	r0, #0
 	bx	lr
diff --git a/lib/arm/extendsfdf2vfp.S b/lib/arm/extendsfdf2vfp.S
index b1aa88e..17a146e 100644
--- a/lib/arm/extendsfdf2vfp.S
+++ b/lib/arm/extendsfdf2vfp.S
@@ -16,9 +16,10 @@
 // Uses Darwin calling convention where a single precision parameter is 
 // passed in a GPR and a double precision result is returned in R0/R1 pair.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
-	fmsr	s15, r0      // load float register from R0
-	fcvtds	d7, s15      // convert single to double
-	fmrrd	r0, r1, d7   // return result in r0/r1 pair
+	vmov	s15, r0      // load float register from R0
+	vcvt.f64.f32 d7, s15 // convert single to double
+	vmov	r0, r1, d7   // return result in r0/r1 pair
 	bx	lr
diff --git a/lib/arm/fixdfsivfp.S b/lib/arm/fixdfsivfp.S
index 0285a17..b7c3299 100644
--- a/lib/arm/fixdfsivfp.S
+++ b/lib/arm/fixdfsivfp.S
@@ -16,9 +16,10 @@
 // Uses Darwin calling convention where a double precision parameter is 
 // passed in GPR register pair.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
-	fmdrr	d7, r0, r1    // load double register from R0/R1
-	ftosizd	s15, d7       // convert double to 32-bit int into s15
-	fmrs	r0, s15	      // move s15 to result register
+	vmov	d7, r0, r1    // load double register from R0/R1
+	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
+	vmov	r0, s15	      // move s15 to result register
 	bx	lr
diff --git a/lib/arm/fixsfsivfp.S b/lib/arm/fixsfsivfp.S
index d05ba74..1cea6a4 100644
--- a/lib/arm/fixsfsivfp.S
+++ b/lib/arm/fixsfsivfp.S
@@ -16,9 +16,10 @@
 // Uses Darwin calling convention where a single precision parameter is 
 // passed in a GPR..
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
-	fmsr	s15, r0      // load float register from R0
-	ftosizs	s15, s15     // convert single to 32-bit int into s15
-	fmrs	r0, s15	     // move s15 to result register
+	vmov	s15, r0        // load float register from R0
+	vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15
+	vmov	r0, s15	       // move s15 to result register
 	bx	lr
diff --git a/lib/arm/fixunsdfsivfp.S b/lib/arm/fixunsdfsivfp.S
index ddb703c..54b0359 100644
--- a/lib/arm/fixunsdfsivfp.S
+++ b/lib/arm/fixunsdfsivfp.S
@@ -17,9 +17,10 @@
 // Uses Darwin calling convention where a double precision parameter is 
 // passed in GPR register pair.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
-	fmdrr	d7, r0, r1    // load double register from R0/R1
-	ftouizd	s15, d7       // convert double to 32-bit int into s15
-	fmrs	r0, s15	      // move s15 to result register
+	vmov	d7, r0, r1    // load double register from R0/R1
+	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
+	vmov	r0, s15	      // move s15 to result register
 	bx	lr
diff --git a/lib/arm/fixunssfsivfp.S b/lib/arm/fixunssfsivfp.S
index afbb64f..12adb52 100644
--- a/lib/arm/fixunssfsivfp.S
+++ b/lib/arm/fixunssfsivfp.S
@@ -17,9 +17,10 @@
 // Uses Darwin calling convention where a single precision parameter is 
 // passed in a GPR..
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
-	fmsr	s15, r0      // load float register from R0
-	ftouizs	s15, s15     // convert single to 32-bit unsigned into s15
-	fmrs	r0, s15	     // move s15 to result register
+	vmov	s15, r0        // load float register from R0
+	vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15
+	vmov	r0, s15	       // move s15 to result register
 	bx	lr
diff --git a/lib/arm/floatsidfvfp.S b/lib/arm/floatsidfvfp.S
index fe3366a..e6a1eb3 100644
--- a/lib/arm/floatsidfvfp.S
+++ b/lib/arm/floatsidfvfp.S
@@ -16,9 +16,10 @@
 // Uses Darwin calling convention where a double precision result is 
 // return in GPR register pair.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
-	fmsr	s15, r0		   // move int to float register s15
-	fsitod	d7, s15        // convert 32-bit int in s15 to double in d7
-	fmrrd	r0, r1, d7     // move d7 to result register pair r0/r1
+	vmov	s15, r0        // move int to float register s15
+	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
+	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
 	bx	lr
diff --git a/lib/arm/floatsisfvfp.S b/lib/arm/floatsisfvfp.S
index 5b41610..0d3a24f 100644
--- a/lib/arm/floatsisfvfp.S
+++ b/lib/arm/floatsisfvfp.S
@@ -16,9 +16,10 @@
 // Uses Darwin calling convention where a single precision result is 
 // return in a GPR..
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
-	fmsr	s15, r0	     // move int to float register s15
-	fsitos	s15, s15     // convert 32-bit int in s15 to float in s15
-	fmrs	r0, s15      // move s15 to result register
+	vmov	s15, r0	       // move int to float register s15
+	vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15
+	vmov	r0, s15        // move s15 to result register
 	bx	lr
diff --git a/lib/arm/floatunssidfvfp.S b/lib/arm/floatunssidfvfp.S
index 9b22a6f..770b202 100644
--- a/lib/arm/floatunssidfvfp.S
+++ b/lib/arm/floatunssidfvfp.S
@@ -16,9 +16,10 @@
 // Uses Darwin calling convention where a double precision result is 
 // return in GPR register pair.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
-	fmsr	s15, r0		   // move int to float register s15
-	fuitod	d7, s15        // convert 32-bit int in s15 to double in d7
-	fmrrd	r0, r1, d7     // move d7 to result register pair r0/r1
+	vmov	s15, r0        // move int to float register s15
+	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
+	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
 	bx	lr
diff --git a/lib/arm/floatunssisfvfp.S b/lib/arm/floatunssisfvfp.S
index 44d5e93..16b3ffb 100644
--- a/lib/arm/floatunssisfvfp.S
+++ b/lib/arm/floatunssisfvfp.S
@@ -16,9 +16,10 @@
 // Uses Darwin calling convention where a single precision result is 
 // return in a GPR..
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
-	fmsr	s15, r0	     // move int to float register s15
-	fuitos 	s15, s15     // convert 32-bit int in s15 to float in s15
-	fmrs	r0, s15      // move s15 to result register
+	vmov	s15, r0	       // move int to float register s15
+	vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15
+	vmov	r0, s15        // move s15 to result register
 	bx	lr
diff --git a/lib/arm/gedf2vfp.S b/lib/arm/gedf2vfp.S
index 9993f52..55603b8 100644
--- a/lib/arm/gedf2vfp.S
+++ b/lib/arm/gedf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where double precision arguments are passsed 
 // like in GPR pairs.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
-	fmdrr	d6, r0, r1	// load r0/r1 pair in double register
-	fmdrr	d7, r2, r3	// load r2/r3 pair in double register
-	fcmpd	d6, d7		
-	fmstat
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+	vmrs	apsr_nzcv, fpscr
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
 	bx	lr
diff --git a/lib/arm/gesf2vfp.S b/lib/arm/gesf2vfp.S
index 9ce1682..02da35c 100644
--- a/lib/arm/gesf2vfp.S
+++ b/lib/arm/gesf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where single precision arguments are passsed 
 // like 32-bit ints
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
-	fmsr	s14, r0	    // move from GPR 0 to float register
-	fmsr	s15, r1	    // move from GPR 1 to float register
-	fcmps	s14, s15
-	fmstat
+	vmov	s14, r0	    // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
 	bx	lr
diff --git a/lib/arm/gtdf2vfp.S b/lib/arm/gtdf2vfp.S
index 8a049c8..b5b1e14 100644
--- a/lib/arm/gtdf2vfp.S
+++ b/lib/arm/gtdf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where double precision arguments are passsed 
 // like in GPR pairs.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
-	fmdrr	d6, r0, r1	// load r0/r1 pair in double register
-	fmdrr	d7, r2, r3	// load r2/r3 pair in double register
-	fcmpd	d6, d7		
-	fmstat
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+	vmrs	apsr_nzcv, fpscr
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
 	bx	lr
diff --git a/lib/arm/gtsf2vfp.S b/lib/arm/gtsf2vfp.S
index 1ffe1ec..685a9ce 100644
--- a/lib/arm/gtsf2vfp.S
+++ b/lib/arm/gtsf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where single precision arguments are passsed 
 // like 32-bit ints
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
-	fmsr	s14, r0		// move from GPR 0 to float register
-	fmsr	s15, r1		// move from GPR 1 to float register
-	fcmps	s14, s15
-	fmstat
+	vmov	s14, r0		// move from GPR 0 to float register
+	vmov	s15, r1		// move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
 	bx	lr
diff --git a/lib/arm/ledf2vfp.S b/lib/arm/ledf2vfp.S
index a04d0f2..6e140dd 100644
--- a/lib/arm/ledf2vfp.S
+++ b/lib/arm/ledf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where double precision arguments are passsed 
 // like in GPR pairs.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
-	fmdrr	d6, r0, r1	// load r0/r1 pair in double register
-	fmdrr	d7, r2, r3	// load r2/r3 pair in double register
-	fcmpd	d6, d7		
-	fmstat
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+	vmrs	apsr_nzcv, fpscr
 	movls	r0, #1		// set result register to 1 if equal
 	movhi	r0, #0
 	bx	lr
diff --git a/lib/arm/lesf2vfp.S b/lib/arm/lesf2vfp.S
index 3011200..7b28250 100644
--- a/lib/arm/lesf2vfp.S
+++ b/lib/arm/lesf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where single precision arguments are passsed 
 // like 32-bit ints
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
-	fmsr	s14, r0     // move from GPR 0 to float register
-	fmsr	s15, r1     // move from GPR 1 to float register
-	fcmps	s14, s15
-	fmstat
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1     // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
 	movls	r0, #1      // set result register to 1 if equal
 	movhi	r0, #0
 	bx	lr
diff --git a/lib/arm/ltdf2vfp.S b/lib/arm/ltdf2vfp.S
index 87144a8..a09e67a 100644
--- a/lib/arm/ltdf2vfp.S
+++ b/lib/arm/ltdf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where double precision arguments are passsed 
 // like in GPR pairs.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
-	fmdrr	d6, r0, r1	// load r0/r1 pair in double register
-	fmdrr	d7, r2, r3	// load r2/r3 pair in double register
-	fcmpd	d6, d7		
-	fmstat
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+	vmrs	apsr_nzcv, fpscr
 	movmi	r0, #1		// set result register to 1 if equal
 	movpl	r0, #0
 	bx	lr
diff --git a/lib/arm/ltsf2vfp.S b/lib/arm/ltsf2vfp.S
index ca06ae2..8c7f9a8 100644
--- a/lib/arm/ltsf2vfp.S
+++ b/lib/arm/ltsf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where single precision arguments are passsed 
 // like 32-bit ints
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
-	fmsr	s14, r0     // move from GPR 0 to float register
-	fmsr	s15, r1     // move from GPR 1 to float register
-	fcmps	s14, s15
-	fmstat
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1     // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
 	movmi	r0, #1      // set result register to 1 if equal
 	movpl	r0, #0
 	bx	lr
diff --git a/lib/arm/muldf3vfp.S b/lib/arm/muldf3vfp.S
index 96bba06..838581e 100644
--- a/lib/arm/muldf3vfp.S
+++ b/lib/arm/muldf3vfp.S
@@ -15,10 +15,11 @@
 // Multiplies two double precision floating point numbers using the Darwin
 // calling convention where double arguments are passsed in GPR pairs
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
-	fmdrr	d6, r0, r1		// move first param from r0/r1 pair into d6
-	fmdrr	d7, r2, r3		// move second param from r2/r3 pair into d7
-	fmuld	d6, d6, d7		
-	fmrrd	r0, r1, d6		// move result back to r0/r1 pair
+	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
+	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
+	vmul.f64 d6, d6, d7		
+	vmov 	r0, r1, d6         // move result back to r0/r1 pair
 	bx	lr
diff --git a/lib/arm/mulsf3vfp.S b/lib/arm/mulsf3vfp.S
index c56991d..ea25913 100644
--- a/lib/arm/mulsf3vfp.S
+++ b/lib/arm/mulsf3vfp.S
@@ -15,10 +15,11 @@
 // Multiplies two single precision floating point numbers using the Darwin
 // calling convention where single arguments are passsed like 32-bit ints.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
-	fmsr	s14, r0		// move first param from r0 into float register
-	fmsr	s15, r1		// move second param from r1 into float register
-	fmuls	s13, s14, s15
-	fmrs	r0, s13		// move result back to r0
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vmul.f32 s13, s14, s15
+	vmov	r0, s13		// move result back to r0
 	bx	lr
diff --git a/lib/arm/nedf2vfp.S b/lib/arm/nedf2vfp.S
index a02b09c..2167081 100644
--- a/lib/arm/nedf2vfp.S
+++ b/lib/arm/nedf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where double precision arguments are passsed 
 // like in GPR pairs.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
-	fmdrr	d6, r0, r1	// load r0/r1 pair in double register
-	fmdrr	d7, r2, r3	// load r2/r3 pair in double register
-	fcmpd	d6, d7		
-	fmstat
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7		
+	vmrs	apsr_nzcv, fpscr
 	movne	r0, #1		// set result register to 0 if unequal
 	moveq	r0, #0
 	bx	lr
diff --git a/lib/arm/negdf2vfp.S b/lib/arm/negdf2vfp.S
index 100f4fd..64c9b69 100644
--- a/lib/arm/negdf2vfp.S
+++ b/lib/arm/negdf2vfp.S
@@ -15,6 +15,7 @@
 // Returns the negation a double precision floating point numbers using the 
 // Darwin calling convention where double arguments are passsed in GPR pairs.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
 	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair
diff --git a/lib/arm/negsf2vfp.S b/lib/arm/negsf2vfp.S
index f96c8ad..b883b73 100644
--- a/lib/arm/negsf2vfp.S
+++ b/lib/arm/negsf2vfp.S
@@ -15,6 +15,7 @@
 // Returns the negation of a single precision floating point numbers using the 
 // Darwin calling convention where single arguments are passsed like 32-bit ints
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
 	eor	r0, r0, #-2147483648	// flip sign bit on float in r0
diff --git a/lib/arm/nesf2vfp.S b/lib/arm/nesf2vfp.S
index d620549..fa7aa80 100644
--- a/lib/arm/nesf2vfp.S
+++ b/lib/arm/nesf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where single precision arguments are passsed 
 // like 32-bit ints
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
-	fmsr	s14, r0	    // move from GPR 0 to float register
-	fmsr	s15, r1	    // move from GPR 1 to float register
-	fcmps	s14, s15
-	fmstat
+	vmov	s14, r0	    // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
 	movne	r0, #1      // set result register to 1 if unequal
 	moveq	r0, #0
 	bx	lr
diff --git a/lib/arm/subdf3vfp.S b/lib/arm/subdf3vfp.S
index ff53b30..3f88baa 100644
--- a/lib/arm/subdf3vfp.S
+++ b/lib/arm/subdf3vfp.S
@@ -15,10 +15,11 @@
 // Returns difference between two double precision floating point numbers using 
 // the Darwin calling convention where double arguments are passsed in GPR pairs
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
-	fmdrr	d6, r0, r1		// move first param from r0/r1 pair into d6
-	fmdrr	d7, r2, r3		// move second param from r2/r3 pair into d7
-	fsubd	d6, d6, d7		
-	fmrrd	r0, r1, d6		// move result back to r0/r1 pair
+	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
+	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
+	vsub.f64 d6, d6, d7		
+	vmov 	r0, r1, d6         // move result back to r0/r1 pair
 	bx	lr
diff --git a/lib/arm/subsf3vfp.S b/lib/arm/subsf3vfp.S
index 238f3f0..ed02ba9 100644
--- a/lib/arm/subsf3vfp.S
+++ b/lib/arm/subsf3vfp.S
@@ -16,10 +16,11 @@
 // using the Darwin calling convention where single arguments are passsed
 // like 32-bit ints.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
-	fmsr	s14, r0		// move first param from r0 into float register
-	fmsr	s15, r1		// move second param from r1 into float register
-	fsubs	s14, s14, s15
-	fmrs	r0, s14		// move result back to r0
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vsub.f32 s14, s14, s15
+	vmov	r0, s14		// move result back to r0
 	bx	lr
diff --git a/lib/arm/truncdfsf2vfp.S b/lib/arm/truncdfsf2vfp.S
index 6e55c7f..371aee9 100644
--- a/lib/arm/truncdfsf2vfp.S
+++ b/lib/arm/truncdfsf2vfp.S
@@ -16,9 +16,10 @@
 // Uses Darwin calling convention where a double precision parameter is 
 // passed in a R0/R1 pair and a signle precision result is returned in R0.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
-	fmdrr	d7, r0, r1   // load double from r0/r1 pair
-	fcvtsd	s15, d7      // convert double to single (trucate precision)
-	fmrs	r0, s15      // return result in r0
+	vmov 	d7, r0, r1   // load double from r0/r1 pair
+	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
+	vmov 	r0, s15      // return result in r0
 	bx	lr
diff --git a/lib/arm/unorddf2vfp.S b/lib/arm/unorddf2vfp.S
index 9b52131..c49e55f 100644
--- a/lib/arm/unorddf2vfp.S
+++ b/lib/arm/unorddf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where double precision arguments are passsed 
 // like in GPR pairs.
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
-	fmdrr	d6, r0, r1	// load r0/r1 pair in double register
-	fmdrr	d7, r2, r3	// load r2/r3 pair in double register
-	fcmpd	d6, d7		
-	fmstat
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7		
+	vmrs	apsr_nzcv, fpscr
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
 	bx	lr
diff --git a/lib/arm/unordsf2vfp.S b/lib/arm/unordsf2vfp.S
index e486533..0ab27ed 100644
--- a/lib/arm/unordsf2vfp.S
+++ b/lib/arm/unordsf2vfp.S
@@ -16,12 +16,13 @@
 // Uses Darwin calling convention where single precision arguments are passsed 
 // like 32-bit ints
 //
+	.syntax unified
 	.align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
-	fmsr	s14, r0     // move from GPR 0 to float register
-	fmsr	s15, r1	    // move from GPR 1 to float register
-	fcmps	s14, s15
-	fmstat
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+	vmrs	apsr_nzcv, fpscr
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
 	bx	lr
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt
new file mode 100644
index 0000000..76bb1d8
--- /dev/null
+++ b/lib/asan/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Build for the AddressSanitizer runtime support library.
+
+file(GLOB ASAN_SOURCES "*.cc")
+
+if(CAN_TARGET_X86_64)
+  add_library(clang_rt.asan-x86_64 STATIC ${ASAN_SOURCES})
+  set_target_properties(clang_rt.asan-x86_64 PROPERTIES COMPILE_FLAGS "${TARGET_X86_64_CFLAGS}")
+endif()
+if(CAN_TARGET_I386)
+  add_library(clang_rt.asan-i386 STATIC ${ASAN_SOURCES})
+  set_target_properties(clang_rt.asan-i386 PROPERTIES COMPILE_FLAGS "${TARGET_I386_CFLAGS}")
+endif()
diff --git a/lib/asan/Makefile.mk b/lib/asan/Makefile.mk
new file mode 100644
index 0000000..392d621
--- /dev/null
+++ b/lib/asan/Makefile.mk
@@ -0,0 +1,24 @@
+#===- lib/asan/Makefile.mk ---------------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := asan
+SubDirs := interception
+
+Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
+ObjNames := $(Sources:%.cc=%.o)
+
+Implementation := Generic
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard $(Dir)/*.h)
+Dependencies += $(wildcard $(Dir)/interception/*.h)
+Dependencies += $(wildcard $(Dir)/interception/mach_override/*.h)
+
+# Define a convenience variable for all the asan functions.
+AsanFunctions := $(Sources:%.cc=%)
diff --git a/lib/asan/Makefile.old b/lib/asan/Makefile.old
new file mode 100644
index 0000000..fe57480
--- /dev/null
+++ b/lib/asan/Makefile.old
@@ -0,0 +1,356 @@
+#===- lib/asan/Makefile.old --------------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+OS=$(shell uname | tr '[A-Z]' '[a-z]')
+ROOT=$(shell pwd)
+MAKEFILE=Makefile.old  # this file.
+
+ifeq ($(ARCH), android)
+  ANDROID_CFLAGS= \
+		-DANDROID \
+		-D__WORDSIZE=32 \
+		-I$(ANDROID_BUILD_TOP)/external/stlport/stlport \
+		-I$(ANDROID_BUILD_TOP)/bionic \
+		-I$(ANDROID_BUILD_TOP)/bionic/libstdc++/include \
+		-I$(ANDROID_BUILD_TOP)/bionic/libc/arch-arm/include \
+		-I$(ANDROID_BUILD_TOP)/bionic/libc/include \
+		-I$(ANDROID_BUILD_TOP)/bionic/libc/kernel/common \
+		-I$(ANDROID_BUILD_TOP)/bionic/libc/kernel/arch-arm \
+		-I$(ANDROID_BUILD_TOP)/bionic/libm/include \
+		-I$(ANDROID_BUILD_TOP)/bionic/libm/include/arm \
+		-I$(ANDROID_BUILD_TOP)/bionic/libthread_db/include \
+		-L$(ANDROID_PRODUCT_OUT)/obj/lib
+  CLANG_FLAGS= \
+		-ccc-host-triple arm-linux-androideabi \
+		-D__compiler_offsetof=__builtin_offsetof \
+		-D__ELF__=1 \
+		-ccc-gcc-name arm-linux-androideabi-g++ \
+		$(ANDROID_CFLAGS)
+  CC=$(ANDROID_EABI_TOOLCHAIN)/arm-linux-androideabi-gcc $(ANDROID_CFLAGS)
+  CXX=$(ANDROID_EABI_TOOLCHAIN)/arm-linux-androideabi-g++ $(ANDROID_CFLAGS)
+endif
+
+ifeq ($(ARCH), arm)
+  # Example make command line:
+  # CROSSTOOL=$HOME/x-tools/arm-unknown-linux-gnueabi/ PATH=$CROSSTOOL/bin:$PATH make ARCH=arm asan_test
+  CLANG_FLAGS= \
+		-ccc-host-triple arm-unknown-linux-gnueabi \
+		-march=armv7-a -mfloat-abi=softfp -mfp=neon \
+		-ccc-gcc-name arm-unknown-linux-gnueabi-g++ \
+		-B$(CROSSTOOL)/lib/gcc/arm-unknown-linux-gnueabi/4.4.4 \
+		-B$(CROSSTOOL)/arm-unknown-linux-gnueabi/sys-root/usr/lib \
+		-I$(CROSSTOOL)/lib/gcc/arm-unknown-linux-gnueabi/4.4.4/include \
+		-I$(CROSSTOOL)/arm-unknown-linux-gnueabi/include/c++/4.4.4 \
+		-I$(CROSSTOOL)/arm-unknown-linux-gnueabi/include/c++/4.4.4/arm-unknown-linux-gnueabi \
+		-I$(CROSSTOOL)/arm-unknown-linux-gnueabi/sys-root/include \
+		-I$(CROSSTOOL)/arm-unknown-linux-gnueabi/sys-root/usr/include \
+		-L$(CROSSTOOL)/lib/gcc/arm-unknown-linux-gnueabi/4.4.4 \
+		-L$(CROSSTOOL)/arm-unknown-linux-gnueabi/sys-root/lib \
+		-L$(CROSSTOOL)/arm-unknown-linux-gnueabi/sys-root/usr/lib
+  CC=$(CROSSTOOL)/bin/arm-unknown-linux-gnueabi-gcc 
+  CXX=$(CROSSTOOL)/bin/arm-unknown-linux-gnueabi-g++
+endif
+
+CLANG_FLAGS=
+CLANG_VERSION=3.2
+CLANG_BUILD=$(ROOT)/../../../../build/Release+Asserts
+CLANG_CC=$(CLANG_BUILD)/bin/clang $(CLANG_FLAGS)
+CLANG_CXX=$(CLANG_BUILD)/bin/clang++ $(CLANG_FLAGS)
+FILE_CHECK=$(CLANG_BUILD)/bin/FileCheck
+
+CC=$(CLANG_CC)
+CXX=$(CLANG_CXX)
+
+CFLAGS:=-Wall -fvisibility=hidden
+
+CLEANROOM_CXX=$(CXX) -Wall
+
+INSTALL_DIR=../asan_clang_$(OS)
+BIN=bin_$(OS)
+
+LIBS=#-lpthread -ldl
+ARCH=x86_64
+
+ASAN_STACK=1
+ASAN_GLOBALS=1
+ASAN_SCALE=0  # default will be used
+ASAN_OFFSET=-1  #default will be used
+ASAN_UAR=0
+ASAN_HAS_EXCEPTIONS=1
+ASAN_FLEXIBLE_MAPPING_AND_OFFSET=0
+ASAN_HAS_BLACKLIST=1
+ASAN_NEEDS_SEGV=1
+ASAN_PIE=0
+
+ifeq ($(ARCH), i386)
+BITS=32
+SUFF=$(BITS)
+CFLAGS:=$(CFLAGS) -m$(BITS)
+endif
+
+ifeq ($(ARCH), x86_64)
+BITS=64
+SUFF=$(BITS)
+CFLAGS:=$(CFLAGS) -m$(BITS)
+endif
+
+ifeq ($(ARCH), arm)
+BITS=32
+SUFF=_arm
+CFLAGS:=$(CFLAGS) -march=armv7-a
+ASAN_HAS_EXCEPTIONS=0
+endif
+
+ifeq ($(ARCH), android)
+BITS=32
+SUFF=_android
+CFLAGS:=$(CFLAGS)
+ASAN_HAS_EXCEPTIONS=0
+endif
+
+PIE=
+ifeq ($(ASAN_PIE), 1)
+  PIE=-fPIE -pie
+endif
+
+# This will build libasan on linux for both x86_64 and i386 in the
+# desired location. The Mac library is already build by the clang's make.
+# $(CLANG_BUILD)/lib/clang/$(CLANG_VERSION)/lib/$(OS)/libclang_rt.asan-$(ARCH).a
+LIBASAN_INST_DIR=$(CLANG_BUILD)/lib/clang/$(CLANG_VERSION)/lib/$(OS)
+LIBASAN_A=$(LIBASAN_INST_DIR)/libclang_rt.asan-$(ARCH).a
+
+BLACKLIST=
+ifeq ($(ASAN_HAS_BLACKLIST), 1)
+  BLACKLIST=-mllvm -asan-blacklist=$(ROOT)/tests/asan_test.ignore
+endif
+
+COMMON_ASAN_DEFINES=\
+                -DASAN_UAR=$(ASAN_UAR) \
+		-DASAN_HAS_EXCEPTIONS=$(ASAN_HAS_EXCEPTIONS) \
+		-DASAN_NEEDS_SEGV=$(ASAN_NEEDS_SEGV) \
+		-DASAN_HAS_BLACKLIST=$(ASAN_HAS_BLACKLIST)
+
+CLANG_ASAN_CXX=$(CLANG_CXX) \
+	       -faddress-sanitizer \
+		$(BLACKLIST)  \
+		-mllvm -asan-stack=$(ASAN_STACK)      \
+		-mllvm -asan-globals=$(ASAN_GLOBALS)  \
+		-mllvm -asan-mapping-scale=$(ASAN_SCALE) \
+		-mllvm -asan-mapping-offset-log=$(ASAN_OFFSET) \
+		-mllvm -asan-use-after-return=$(ASAN_UAR) \
+		$(COMMON_ASAN_DEFINES)
+
+CLANG_ASAN_LD=$(CLANG_CXX) -faddress-sanitizer
+
+GCC_ASAN_PATH=SET_FROM_COMMAND_LINE
+GCC_ASAN_CXX=$(GCC_ASAN_PATH)/g++ \
+	     -faddress-sanitizer \
+	     $(COMMON_ASAN_DEFINES)
+
+GCC_ASAN_LD=$(GCC_ASAN_PATH)/g++ -ldl -lpthread
+
+ASAN_COMPILER=clang
+
+ifeq ($(ASAN_COMPILER), clang)
+  ASAN_CXX=$(CLANG_ASAN_CXX)
+  ASAN_LD=$(CLANG_ASAN_LD)
+  ASAN_LD_TAIL=
+endif
+
+ifeq ($(ASAN_COMPILER), gcc)
+  ASAN_CXX=$(GCC_ASAN_CXX)
+  ASAN_LD=$(GCC_ASAN_LD)
+  ASAN_LD_TAIL=$(LIBASAN_A)
+endif
+
+RTL_HDR=asan_allocator.h \
+	asan_internal.h \
+	asan_interceptors.h \
+	asan_interface.h \
+	asan_lock.h \
+	asan_mapping.h \
+	asan_procmaps.h \
+	asan_stack.h \
+	asan_stats.h \
+	asan_thread.h \
+	asan_thread_registry.h \
+	interception/interception.h \
+	interception/interception_linux.h \
+	interception/interception_mac.h \
+	interception/mach_override/mach_override.h
+
+LIBASAN_OBJ=$(BIN)/asan_rtl$(SUFF).o \
+	    $(BIN)/asan_allocator$(SUFF).o  \
+	    $(BIN)/asan_globals$(SUFF).o  \
+	    $(BIN)/asan_interceptors$(SUFF).o  \
+	    $(BIN)/asan_linux$(SUFF).o \
+	    $(BIN)/asan_mac$(SUFF).o \
+	    $(BIN)/asan_malloc_linux$(SUFF).o \
+	    $(BIN)/asan_malloc_mac$(SUFF).o \
+	    $(BIN)/asan_new_delete$(SUFF).o \
+	    $(BIN)/asan_poisoning$(SUFF).o  \
+	    $(BIN)/asan_posix$(SUFF).o  \
+	    $(BIN)/asan_printf$(SUFF).o  \
+	    $(BIN)/asan_stack$(SUFF).o  \
+	    $(BIN)/asan_stats$(SUFF).o  \
+	    $(BIN)/asan_thread$(SUFF).o  \
+	    $(BIN)/asan_thread_registry$(SUFF).o  \
+	    $(BIN)/interception/interception_linux$(SUFF).o  \
+	    $(BIN)/interception/interception_mac$(SUFF).o  \
+	    $(BIN)/interception/mach_override/mach_override$(SUFF).o
+
+GTEST_ROOT=third_party/googletest
+GTEST_INCLUDE=-I$(GTEST_ROOT)/include
+GTEST_MAKE_DIR=$(GTEST_ROOT)/make-$(OS)$(SUFF)
+GTEST_LIB=$(GTEST_MAKE_DIR)/gtest-all.o
+
+all: b64 b32
+
+test: t64 t32 output_tests lint
+	@echo "ALL TESTS PASSED"
+
+output_tests: b32 b64
+	cd output_tests && ./test_output.sh $(CLANG_CXX) $(CLANG_CC) $(FILE_CHECK)
+
+t64: b64
+	$(BIN)/asan_test64
+t32: b32
+	$(BIN)/asan_test32
+
+b64: | mk_bin_dir
+	$(MAKE) -f $(MAKEFILE) ARCH=x86_64 asan_test asan_benchmarks
+b32: | mk_bin_dir
+	$(MAKE) -f $(MAKEFILE) ARCH=i386 asan_test asan_benchmarks
+
+lib64:
+	$(MAKE) -f $(MAKEFILE) ARCH=x86_64 lib
+lib32:
+	$(MAKE) -f $(MAKEFILE) ARCH=i386 lib
+
+mk_bin_dir:
+	mkdir -p $(BIN)
+	mkdir -p $(BIN)/interception
+	mkdir -p $(BIN)/interception/mach_override
+
+clang:
+	cd ../ && llvm/rebuild_clang_and_asan.sh > /dev/null
+
+install: install_clang
+
+$(INSTALL_DIR):
+	mkdir -p $(INSTALL_DIR) $(INSTALL_DIR)/bin $(INSTALL_DIR)/lib
+
+install_clang: | $(INSTALL_DIR)
+	cp -v $(CLANG_BUILD)/bin/clang $(INSTALL_DIR)/bin
+	cp -rv $(CLANG_BUILD)/lib/clang $(INSTALL_DIR)/lib
+	(cd $(INSTALL_DIR)/bin; ln -sf clang clang++)
+
+#install_lib: | $(INSTALL_DIR)
+#	cp -v $(CLANG_BUILD)/lib/libasan*.a $(INSTALL_DIR)/lib
+
+$(BIN)/asan_noinst_test$(SUFF).o: tests/asan_noinst_test.cc $(RTL_HDR) $(MAKEFILE)
+	$(CLEANROOM_CXX) $(PIE) $(CFLAGS) $(GTEST_INCLUDE) -I. -g -c $< -O2 -o $@
+
+$(BIN)/asan_break_optimization$(SUFF).o: tests/asan_break_optimization.cc $(MAKEFILE)
+	$(CLEANROOM_CXX) $(PIE) $(CFLAGS) -c $< -O0 -o $@
+
+$(BIN)/%_test$(SUFF).o: tests/%_test.cc $(RTL_HDR) $(MAKEFILE)
+	$(ASAN_CXX) $(GTEST_INCLUDE) -I. -g -c $< -O2 -o $@ $(PIE) $(CFLAGS)
+
+$(BIN)/%_test$(SUFF).o: tests/%_test.mm $(RTL_HDR) $(MAKEFILE)
+	$(ASAN_CXX) $(GTEST_INCLUDE) -I. -g -c $< -O2 -o $@ -ObjC $(PIE) $(CFLAGS)
+
+$(BIN)/%$(SUFF).o: %.cc $(RTL_HDR) $(MAKEFILE)
+	$(CXX) $(PIE) $(CFLAGS) -fPIC -c -O2 -fno-exceptions -funwind-tables \
+		-o $@ -g $< -Ithird_party \
+		-DASAN_NEEDS_SEGV=$(ASAN_NEEDS_SEGV) \
+		-DASAN_HAS_EXCEPTIONS=$(ASAN_HAS_EXCEPTIONS) \
+		-DASAN_FLEXIBLE_MAPPING_AND_OFFSET=$(ASAN_FLEXIBLE_MAPPING_AND_OFFSET) \
+		$(ASAN_FLAGS)
+
+$(BIN)/%$(SUFF).o: %.c $(RTL_HDR) $(MAKEFILE)
+	$(CC) $(PIE) $(CFLAGS) -fPIC -c -O2 -o $@ -g $< -Ithird_party \
+		$(ASAN_FLAGS)
+
+ifeq ($(OS),darwin)
+LD_FLAGS=-framework Foundation
+else
+LD_FLAGS=
+endif
+
+lib: $(LIBASAN_A)
+
+$(LIBASAN_A): mk_bin_dir $(LIBASAN_OBJ) $(MAKEFILE)
+	mkdir -p $(LIBASAN_INST_DIR)
+	ar ru $@ $(LIBASAN_OBJ)
+	$(CXX) -shared $(CFLAGS) $(LIBASAN_OBJ) $(LD_FLAGS) -o $(BIN)/libasan$(SUFF).so
+
+TEST_OBJECTS_COMMON=\
+	     $(BIN)/asan_test$(SUFF).o \
+	     $(BIN)/asan_globals_test$(SUFF).o \
+	     $(BIN)/asan_break_optimization$(SUFF).o \
+	     $(BIN)/asan_noinst_test$(SUFF).o \
+	     $(BIN)/asan_interface_test$(SUFF).o
+
+BENCHMARK_OBJECTS=\
+	     $(BIN)/asan_benchmarks_test$(SUFF).o \
+	     $(BIN)/asan_break_optimization$(SUFF).o
+
+ifeq ($(OS),darwin)
+TEST_OBJECTS=$(TEST_OBJECTS_COMMON) \
+	     $(BIN)/asan_mac_test$(SUFF).o
+else
+TEST_OBJECTS=$(TEST_OBJECTS_COMMON)
+endif
+
+$(BIN)/asan_test$(SUFF): $(TEST_OBJECTS) $(LIBASAN_A) $(MAKEFILE) tests/asan_test.ignore $(GTEST_LIB)
+	$(ASAN_LD) $(PIE) $(CFLAGS) -g -O3 $(TEST_OBJECTS) \
+		$(LD_FLAGS) -o $@ $(LIBS) $(GTEST_LIB) $(ASAN_LD_TAIL)
+
+$(BIN)/asan_benchmarks$(SUFF): $(BENCHMARK_OBJECTS) $(LIBASAN_A) $(MAKEFILE) $(GTEST_LIB)
+	$(ASAN_LD) $(PIE) $(CFLAGS) -g -O3 $(BENCHMARK_OBJECTS) \
+		$(LD_FLAGS) -o $@ $(LIBS) $(GTEST_LIB) $(ASAN_LD_TAIL)
+
+asan_test: $(BIN)/asan_test$(SUFF)
+
+asan_benchmarks: $(BIN)/asan_benchmarks$(SUFF)
+
+# for now, build gtest with clang/asan even if we use a different compiler.
+$(GTEST_LIB):
+	mkdir -p $(GTEST_MAKE_DIR) && \
+	cd $(GTEST_MAKE_DIR) && \
+	$(MAKE) -f ../make/Makefile CXXFLAGS="$(PIE) $(CFLAGS) -g -w" \
+	  CXX="$(CLANG_CXX)"
+
+RTL_LINT_FITLER=-readability/casting,-readability/check,-build/include,-build/header_guard,-build/class,-legal/copyright
+# TODO(kcc): remove these filters one by one
+TEST_LINT_FITLER=-readability/casting,-build/include,-legal/copyright,-whitespace/newline,-runtime/sizeof,-runtime/int,-runtime/printf
+
+LLVM_LINT_FILTER=-,+whitespace
+
+ADDRESS_SANITIZER_CPP=../../../../lib/Transforms/Instrumentation/AddressSanitizer.cpp
+
+lint:
+	third_party/cpplint/cpplint.py --filter=$(LLVM_LINT_FILTER) $(ADDRESS_SANITIZER_CPP)
+	third_party/cpplint/cpplint.py --filter=$(RTL_LINT_FITLER) asan_*.cc asan_*.h
+	third_party/cpplint/cpplint.py --filter=$(RTL_LINT_FITLER) interception/interception*.h interception/interception*.cc
+	third_party/cpplint/cpplint.py --filter=$(TEST_LINT_FITLER) tests/*.cc output_tests/*.cc
+
+get_third_party:
+	rm -rf third_party
+	mkdir third_party
+	(cd third_party && \
+	svn co -r375        http://googletest.googlecode.com/svn/trunk googletest && \
+	svn co -r69 http://google-styleguide.googlecode.com/svn/trunk/cpplint cpplint \
+	)
+
+clean:
+	rm -f *.o *.ll *.S *.a *.log asan_test64* asan_test32*  a.out perf.data log
+	rm -rf $(BIN)
+	rm -rf $(GTEST_ROOT)/make-*
diff --git a/lib/asan/README.txt b/lib/asan/README.txt
new file mode 100644
index 0000000..5e66004
--- /dev/null
+++ b/lib/asan/README.txt
@@ -0,0 +1,25 @@
+AddressSanitizer RT
+================================
+This directory contains sources of the AddressSanitizer (asan) run-time library.
+We are in the process of integrating AddressSanitizer with LLVM, stay tuned.
+
+Directory structre:
+
+README.txt       : This file.
+Makefile.mk      : Currently a stub for a proper makefile. not usable.
+Makefile.old     : Old out-of-tree makefile, the only usable one so far.
+asan_*.{cc,h}    : Sources of the asan run-time lirbary.
+mach_override/*  : Utility to override functions on Darwin (MIT License).
+scripts/*        : Helper scripts.
+
+Temporary build instructions (verified on linux):
+
+cd lib/asan
+make -f Makefile.old get_third_party  # gets googletest and cpplint
+make -f Makefile.old test -j 8 CLANG_BUILD=/path/to/Release+Asserts
+# Optional:
+# make -f Makefile.old install # installs clang and rt to lib/asan_clang_linux
+
+For more info see http://code.google.com/p/address-sanitizer/
+
+
diff --git a/lib/asan/asan_allocator.cc b/lib/asan/asan_allocator.cc
new file mode 100644
index 0000000..59d6626
--- /dev/null
+++ b/lib/asan/asan_allocator.cc
@@ -0,0 +1,1082 @@
+//===-- asan_allocator.cc ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Implementation of ASan's memory allocator.
+// Evey piece of memory (AsanChunk) allocated by the allocator
+// has a left redzone of REDZONE bytes and
+// a right redzone such that the end of the chunk is aligned by REDZONE
+// (i.e. the right redzone is between 0 and REDZONE-1).
+// The left redzone is always poisoned.
+// The right redzone is poisoned on malloc, the body is poisoned on free.
+// Once freed, a chunk is moved to a quarantine (fifo list).
+// After quarantine, a chunk is returned to freelists.
+//
+// The left redzone contains ASan's internal data and the stack trace of
+// the malloc call.
+// Once freed, the body of the chunk contains the stack trace of the free call.
+//
+//===----------------------------------------------------------------------===//
+
+#include "asan_allocator.h"
+#include "asan_interceptors.h"
+#include "asan_interface.h"
+#include "asan_internal.h"
+#include "asan_lock.h"
+#include "asan_mapping.h"
+#include "asan_stats.h"
+#include "asan_thread.h"
+#include "asan_thread_registry.h"
+
+#ifdef _WIN32
+#include <intrin.h>
+#endif
+
+namespace __asan {
+
+#define  REDZONE FLAG_redzone
+static const size_t kMinAllocSize = REDZONE * 2;
+static const uint64_t kMaxAvailableRam = 128ULL << 30;  // 128G
+static const size_t kMaxThreadLocalQuarantine = 1 << 20;  // 1M
+
+static const size_t kMinMmapSize = (ASAN_LOW_MEMORY) ? 4UL << 17 : 4UL << 20;
+static const size_t kMaxSizeForThreadLocalFreeList =
+    (ASAN_LOW_MEMORY) ? 1 << 15 : 1 << 17;
+
+// Size classes less than kMallocSizeClassStep are powers of two.
+// All other size classes are multiples of kMallocSizeClassStep.
+static const size_t kMallocSizeClassStepLog = 26;
+static const size_t kMallocSizeClassStep = 1UL << kMallocSizeClassStepLog;
+
+static const size_t kMaxAllowedMallocSize =
+    (__WORDSIZE == 32) ? 3UL << 30 : 8UL << 30;
+
+static inline bool IsAligned(uintptr_t a, uintptr_t alignment) {
+  return (a & (alignment - 1)) == 0;
+}
+
+static inline size_t Log2(size_t x) {
+  CHECK(IsPowerOfTwo(x));
+#if defined(_WIN64)
+  unsigned long ret;  // NOLINT
+  _BitScanForward64(&ret, x);
+  return ret;
+#elif defined(_WIN32)
+  unsigned long ret;  // NOLINT
+  _BitScanForward(&ret, x);
+  return ret;
+#else
+  return __builtin_ctzl(x);
+#endif
+}
+
+static inline size_t RoundUpToPowerOfTwo(size_t size) {
+  CHECK(size);
+  if (IsPowerOfTwo(size)) return size;
+
+  unsigned long up;  // NOLINT
+#if defined(_WIN64)
+  _BitScanReverse64(&up, size);
+#elif defined(_WIN32)
+  _BitScanReverse(&up, size);
+#else
+  up = __WORDSIZE - 1 - __builtin_clzl(size);
+#endif
+  CHECK(size < (1ULL << (up + 1)));
+  CHECK(size > (1ULL << up));
+  return 1UL << (up + 1);
+}
+
+static inline size_t SizeClassToSize(uint8_t size_class) {
+  CHECK(size_class < kNumberOfSizeClasses);
+  if (size_class <= kMallocSizeClassStepLog) {
+    return 1UL << size_class;
+  } else {
+    return (size_class - kMallocSizeClassStepLog) * kMallocSizeClassStep;
+  }
+}
+
+static inline uint8_t SizeToSizeClass(size_t size) {
+  uint8_t res = 0;
+  if (size <= kMallocSizeClassStep) {
+    size_t rounded = RoundUpToPowerOfTwo(size);
+    res = Log2(rounded);
+  } else {
+    res = ((size + kMallocSizeClassStep - 1) / kMallocSizeClassStep)
+        + kMallocSizeClassStepLog;
+  }
+  CHECK(res < kNumberOfSizeClasses);
+  CHECK(size <= SizeClassToSize(res));
+  return res;
+}
+
+// Given REDZONE bytes, we need to mark first size bytes
+// as addressable and the rest REDZONE-size bytes as unaddressable.
+static void PoisonHeapPartialRightRedzone(uintptr_t mem, size_t size) {
+  CHECK(size <= REDZONE);
+  CHECK(IsAligned(mem, REDZONE));
+  CHECK(IsPowerOfTwo(SHADOW_GRANULARITY));
+  CHECK(IsPowerOfTwo(REDZONE));
+  CHECK(REDZONE >= SHADOW_GRANULARITY);
+  PoisonShadowPartialRightRedzone(mem, size, REDZONE,
+                                  kAsanHeapRightRedzoneMagic);
+}
+
+static uint8_t *MmapNewPagesAndPoisonShadow(size_t size) {
+  CHECK(IsAligned(size, kPageSize));
+  uint8_t *res = (uint8_t*)AsanMmapSomewhereOrDie(size, __FUNCTION__);
+  PoisonShadow((uintptr_t)res, size, kAsanHeapLeftRedzoneMagic);
+  if (FLAG_debug) {
+    Printf("ASAN_MMAP: [%p, %p)\n", res, res + size);
+  }
+  return res;
+}
+
+// Every chunk of memory allocated by this allocator can be in one of 3 states:
+// CHUNK_AVAILABLE: the chunk is in the free list and ready to be allocated.
+// CHUNK_ALLOCATED: the chunk is allocated and not yet freed.
+// CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone.
+//
+// The pseudo state CHUNK_MEMALIGN is used to mark that the address is not
+// the beginning of a AsanChunk (in which case 'next' contains the address
+// of the AsanChunk).
+//
+// The magic numbers for the enum values are taken randomly.
+enum {
+  CHUNK_AVAILABLE  = 0x573B,
+  CHUNK_ALLOCATED  = 0x3204,
+  CHUNK_QUARANTINE = 0x1978,
+  CHUNK_MEMALIGN   = 0xDC68,
+};
+
+struct ChunkBase {
+  uint16_t   chunk_state;
+  uint8_t    size_class;
+  uint32_t   offset;  // User-visible memory starts at this+offset (beg()).
+  int32_t    alloc_tid;
+  int32_t    free_tid;
+  size_t     used_size;  // Size requested by the user.
+  AsanChunk *next;
+
+  uintptr_t   beg() { return (uintptr_t)this + offset; }
+  size_t Size() { return SizeClassToSize(size_class); }
+  uint8_t SizeClass() { return size_class; }
+};
+
+struct AsanChunk: public ChunkBase {
+  uint32_t *compressed_alloc_stack() {
+    CHECK(REDZONE >= sizeof(ChunkBase));
+    return (uint32_t*)((uintptr_t)this + sizeof(ChunkBase));
+  }
+  uint32_t *compressed_free_stack() {
+    CHECK(REDZONE >= sizeof(ChunkBase));
+    return (uint32_t*)((uintptr_t)this + REDZONE);
+  }
+
+  // The left redzone after the ChunkBase is given to the alloc stack trace.
+  size_t compressed_alloc_stack_size() {
+    return (REDZONE - sizeof(ChunkBase)) / sizeof(uint32_t);
+  }
+  size_t compressed_free_stack_size() {
+    return (REDZONE) / sizeof(uint32_t);
+  }
+
+  bool AddrIsInside(uintptr_t addr, size_t access_size, size_t *offset) {
+    if (addr >= beg() && (addr + access_size) <= (beg() + used_size)) {
+      *offset = addr - beg();
+      return true;
+    }
+    return false;
+  }
+
+  bool AddrIsAtLeft(uintptr_t addr, size_t access_size, size_t *offset) {
+    if (addr < beg()) {
+      *offset = beg() - addr;
+      return true;
+    }
+    return false;
+  }
+
+  bool AddrIsAtRight(uintptr_t addr, size_t access_size, size_t *offset) {
+    if (addr + access_size >= beg() + used_size) {
+      if (addr <= beg() + used_size)
+        *offset = 0;
+      else
+        *offset = addr - (beg() + used_size);
+      return true;
+    }
+    return false;
+  }
+
+  void DescribeAddress(uintptr_t addr, size_t access_size) {
+    size_t offset;
+    Printf("%p is located ", addr);
+    if (AddrIsInside(addr, access_size, &offset)) {
+      Printf("%zu bytes inside of", offset);
+    } else if (AddrIsAtLeft(addr, access_size, &offset)) {
+      Printf("%zu bytes to the left of", offset);
+    } else if (AddrIsAtRight(addr, access_size, &offset)) {
+      Printf("%zu bytes to the right of", offset);
+    } else {
+      Printf(" somewhere around (this is AddressSanitizer bug!)");
+    }
+    Printf(" %zu-byte region [%p,%p)\n",
+           used_size, beg(), beg() + used_size);
+  }
+};
+
+static AsanChunk *PtrToChunk(uintptr_t ptr) {
+  AsanChunk *m = (AsanChunk*)(ptr - REDZONE);
+  if (m->chunk_state == CHUNK_MEMALIGN) {
+    m = m->next;
+  }
+  return m;
+}
+
+
+void AsanChunkFifoList::PushList(AsanChunkFifoList *q) {
+  CHECK(q->size() > 0);
+  if (last_) {
+    CHECK(first_);
+    CHECK(!last_->next);
+    last_->next = q->first_;
+    last_ = q->last_;
+  } else {
+    CHECK(!first_);
+    last_ = q->last_;
+    first_ = q->first_;
+    CHECK(first_);
+  }
+  CHECK(last_);
+  CHECK(!last_->next);
+  size_ += q->size();
+  q->clear();
+}
+
+void AsanChunkFifoList::Push(AsanChunk *n) {
+  CHECK(n->next == NULL);
+  if (last_) {
+    CHECK(first_);
+    CHECK(!last_->next);
+    last_->next = n;
+    last_ = n;
+  } else {
+    CHECK(!first_);
+    last_ = first_ = n;
+  }
+  size_ += n->Size();
+}
+
+// Interesting performance observation: this function takes up to 15% of overal
+// allocator time. That's because *first_ has been evicted from cache long time
+// ago. Not sure if we can or want to do anything with this.
+AsanChunk *AsanChunkFifoList::Pop() {
+  CHECK(first_);
+  AsanChunk *res = first_;
+  first_ = first_->next;
+  if (first_ == NULL)
+    last_ = NULL;
+  CHECK(size_ >= res->Size());
+  size_ -= res->Size();
+  if (last_) {
+    CHECK(!last_->next);
+  }
+  return res;
+}
+
+// All pages we ever allocated.
+struct PageGroup {
+  uintptr_t beg;
+  uintptr_t end;
+  size_t size_of_chunk;
+  uintptr_t last_chunk;
+  bool InRange(uintptr_t addr) {
+    return addr >= beg && addr < end;
+  }
+};
+
+class MallocInfo {
+ public:
+
+  explicit MallocInfo(LinkerInitialized x) : mu_(x) { }
+
+  AsanChunk *AllocateChunks(uint8_t size_class, size_t n_chunks) {
+    AsanChunk *m = NULL;
+    AsanChunk **fl = &free_lists_[size_class];
+    {
+      ScopedLock lock(&mu_);
+      for (size_t i = 0; i < n_chunks; i++) {
+        if (!(*fl)) {
+          *fl = GetNewChunks(size_class);
+        }
+        AsanChunk *t = *fl;
+        *fl = t->next;
+        t->next = m;
+        CHECK(t->chunk_state == CHUNK_AVAILABLE);
+        m = t;
+      }
+    }
+    return m;
+  }
+
+  void SwallowThreadLocalMallocStorage(AsanThreadLocalMallocStorage *x,
+                                       bool eat_free_lists) {
+    CHECK(FLAG_quarantine_size > 0);
+    ScopedLock lock(&mu_);
+    AsanChunkFifoList *q = &x->quarantine_;
+    if (q->size() > 0) {
+      quarantine_.PushList(q);
+      while (quarantine_.size() > FLAG_quarantine_size) {
+        QuarantinePop();
+      }
+    }
+    if (eat_free_lists) {
+      for (size_t size_class = 0; size_class < kNumberOfSizeClasses;
+           size_class++) {
+        AsanChunk *m = x->free_lists_[size_class];
+        while (m) {
+          AsanChunk *t = m->next;
+          m->next = free_lists_[size_class];
+          free_lists_[size_class] = m;
+          m = t;
+        }
+        x->free_lists_[size_class] = 0;
+      }
+    }
+  }
+
+  void BypassThreadLocalQuarantine(AsanChunk *chunk) {
+    ScopedLock lock(&mu_);
+    quarantine_.Push(chunk);
+  }
+
+  AsanChunk *FindMallocedOrFreed(uintptr_t addr, size_t access_size) {
+    ScopedLock lock(&mu_);
+    return FindChunkByAddr(addr);
+  }
+
+  size_t AllocationSize(uintptr_t ptr) {
+    if (!ptr) return 0;
+    ScopedLock lock(&mu_);
+
+    // first, check if this is our memory
+    PageGroup *g = FindPageGroupUnlocked(ptr);
+    if (!g) return 0;
+    AsanChunk *m = PtrToChunk(ptr);
+    if (m->chunk_state == CHUNK_ALLOCATED) {
+      return m->used_size;
+    } else {
+      return 0;
+    }
+  }
+
+  void ForceLock() {
+    mu_.Lock();
+  }
+
+  void ForceUnlock() {
+    mu_.Unlock();
+  }
+
+  void PrintStatus() {
+    ScopedLock lock(&mu_);
+    size_t malloced = 0;
+
+    Printf(" MallocInfo: in quarantine: %zu malloced: %zu; ",
+           quarantine_.size() >> 20, malloced >> 20);
+    for (size_t j = 1; j < kNumberOfSizeClasses; j++) {
+      AsanChunk *i = free_lists_[j];
+      if (!i) continue;
+      size_t t = 0;
+      for (; i; i = i->next) {
+        t += i->Size();
+      }
+      Printf("%zu:%zu ", j, t >> 20);
+    }
+    Printf("\n");
+  }
+
+  PageGroup *FindPageGroup(uintptr_t addr) {
+    ScopedLock lock(&mu_);
+    return FindPageGroupUnlocked(addr);
+  }
+
+ private:
+  PageGroup *FindPageGroupUnlocked(uintptr_t addr) {
+    int n = n_page_groups_;
+    // If the page groups are not sorted yet, sort them.
+    if (n_sorted_page_groups_ < n) {
+      SortArray((uintptr_t*)page_groups_, n);
+      n_sorted_page_groups_ = n;
+    }
+    // Binary search over the page groups.
+    int beg = 0, end = n;
+    while (beg < end) {
+      int med = (beg + end) / 2;
+      uintptr_t g = (uintptr_t)page_groups_[med];
+      if (addr > g) {
+        // 'g' points to the end of the group, so 'addr'
+        // may not belong to page_groups_[med] or any previous group.
+        beg = med + 1;
+      } else {
+        // 'addr' may belong to page_groups_[med] or a previous group.
+        end = med;
+      }
+    }
+    if (beg >= n)
+      return NULL;
+    PageGroup *g = page_groups_[beg];
+    CHECK(g);
+    if (g->InRange(addr))
+      return g;
+    return NULL;
+  }
+
+  // We have an address between two chunks, and we want to report just one.
+  AsanChunk *ChooseChunk(uintptr_t addr,
+                         AsanChunk *left_chunk, AsanChunk *right_chunk) {
+    // Prefer an allocated chunk or a chunk from quarantine.
+    if (left_chunk->chunk_state == CHUNK_AVAILABLE &&
+        right_chunk->chunk_state != CHUNK_AVAILABLE)
+      return right_chunk;
+    if (right_chunk->chunk_state == CHUNK_AVAILABLE &&
+        left_chunk->chunk_state != CHUNK_AVAILABLE)
+      return left_chunk;
+    // Choose based on offset.
+    size_t l_offset = 0, r_offset = 0;
+    CHECK(left_chunk->AddrIsAtRight(addr, 1, &l_offset));
+    CHECK(right_chunk->AddrIsAtLeft(addr, 1, &r_offset));
+    if (l_offset < r_offset)
+      return left_chunk;
+    return right_chunk;
+  }
+
+  AsanChunk *FindChunkByAddr(uintptr_t addr) {
+    PageGroup *g = FindPageGroupUnlocked(addr);
+    if (!g) return 0;
+    CHECK(g->size_of_chunk);
+    uintptr_t offset_from_beg = addr - g->beg;
+    uintptr_t this_chunk_addr = g->beg +
+        (offset_from_beg / g->size_of_chunk) * g->size_of_chunk;
+    CHECK(g->InRange(this_chunk_addr));
+    AsanChunk *m = (AsanChunk*)this_chunk_addr;
+    CHECK(m->chunk_state == CHUNK_ALLOCATED ||
+          m->chunk_state == CHUNK_AVAILABLE ||
+          m->chunk_state == CHUNK_QUARANTINE);
+    size_t offset = 0;
+    if (m->AddrIsInside(addr, 1, &offset))
+      return m;
+
+    if (m->AddrIsAtRight(addr, 1, &offset)) {
+      if (this_chunk_addr == g->last_chunk)  // rightmost chunk
+        return m;
+      uintptr_t right_chunk_addr = this_chunk_addr + g->size_of_chunk;
+      CHECK(g->InRange(right_chunk_addr));
+      return ChooseChunk(addr, m, (AsanChunk*)right_chunk_addr);
+    } else {
+      CHECK(m->AddrIsAtLeft(addr, 1, &offset));
+      if (this_chunk_addr == g->beg)  // leftmost chunk
+        return m;
+      uintptr_t left_chunk_addr = this_chunk_addr - g->size_of_chunk;
+      CHECK(g->InRange(left_chunk_addr));
+      return ChooseChunk(addr, (AsanChunk*)left_chunk_addr, m);
+    }
+  }
+
+  void QuarantinePop() {
+    CHECK(quarantine_.size() > 0);
+    AsanChunk *m = quarantine_.Pop();
+    CHECK(m);
+    // if (F_v >= 2) Printf("MallocInfo::pop %p\n", m);
+
+    CHECK(m->chunk_state == CHUNK_QUARANTINE);
+    m->chunk_state = CHUNK_AVAILABLE;
+    PoisonShadow((uintptr_t)m, m->Size(), kAsanHeapLeftRedzoneMagic);
+    CHECK(m->alloc_tid >= 0);
+    CHECK(m->free_tid >= 0);
+
+    size_t size_class = m->SizeClass();
+    m->next = free_lists_[size_class];
+    free_lists_[size_class] = m;
+
+    // Statistics.
+    AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+    thread_stats.real_frees++;
+    thread_stats.really_freed += m->used_size;
+    thread_stats.really_freed_redzones += m->Size() - m->used_size;
+    thread_stats.really_freed_by_size[m->SizeClass()]++;
+  }
+
+  // Get a list of newly allocated chunks.
+  AsanChunk *GetNewChunks(uint8_t size_class) {
+    size_t size = SizeClassToSize(size_class);
+    CHECK(IsPowerOfTwo(kMinMmapSize));
+    CHECK(size < kMinMmapSize || (size % kMinMmapSize) == 0);
+    size_t mmap_size = Max(size, kMinMmapSize);
+    size_t n_chunks = mmap_size / size;
+    CHECK(n_chunks * size == mmap_size);
+    if (size < kPageSize) {
+      // Size is small, just poison the last chunk.
+      n_chunks--;
+    } else {
+      // Size is large, allocate an extra page at right and poison it.
+      mmap_size += kPageSize;
+    }
+    CHECK(n_chunks > 0);
+    uint8_t *mem = MmapNewPagesAndPoisonShadow(mmap_size);
+
+    // Statistics.
+    AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+    thread_stats.mmaps++;
+    thread_stats.mmaped += mmap_size;
+    thread_stats.mmaped_by_size[size_class] += n_chunks;
+
+    AsanChunk *res = NULL;
+    for (size_t i = 0; i < n_chunks; i++) {
+      AsanChunk *m = (AsanChunk*)(mem + i * size);
+      m->chunk_state = CHUNK_AVAILABLE;
+      m->size_class = size_class;
+      m->next = res;
+      res = m;
+    }
+    PageGroup *pg = (PageGroup*)(mem + n_chunks * size);
+    // This memory is already poisoned, no need to poison it again.
+    pg->beg = (uintptr_t)mem;
+    pg->end = pg->beg + mmap_size;
+    pg->size_of_chunk = size;
+    pg->last_chunk = (uintptr_t)(mem + size * (n_chunks - 1));
+    int page_group_idx = AtomicInc(&n_page_groups_) - 1;
+    CHECK(page_group_idx < (int)ASAN_ARRAY_SIZE(page_groups_));
+    page_groups_[page_group_idx] = pg;
+    return res;
+  }
+
+  AsanChunk *free_lists_[kNumberOfSizeClasses];
+  AsanChunkFifoList quarantine_;
+  AsanLock mu_;
+
+  PageGroup *page_groups_[kMaxAvailableRam / kMinMmapSize];
+  int n_page_groups_;  // atomic
+  int n_sorted_page_groups_;
+};
+
+static MallocInfo malloc_info(LINKER_INITIALIZED);
+
+void AsanThreadLocalMallocStorage::CommitBack() {
+  malloc_info.SwallowThreadLocalMallocStorage(this, true);
+}
+
+static void Describe(uintptr_t addr, size_t access_size) {
+  AsanChunk *m = malloc_info.FindMallocedOrFreed(addr, access_size);
+  if (!m) return;
+  m->DescribeAddress(addr, access_size);
+  CHECK(m->alloc_tid >= 0);
+  AsanThreadSummary *alloc_thread =
+      asanThreadRegistry().FindByTid(m->alloc_tid);
+  AsanStackTrace alloc_stack;
+  AsanStackTrace::UncompressStack(&alloc_stack, m->compressed_alloc_stack(),
+                                  m->compressed_alloc_stack_size());
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  CHECK(t);
+  if (m->free_tid >= 0) {
+    AsanThreadSummary *free_thread =
+        asanThreadRegistry().FindByTid(m->free_tid);
+    Printf("freed by thread T%d here:\n", free_thread->tid());
+    AsanStackTrace free_stack;
+    AsanStackTrace::UncompressStack(&free_stack, m->compressed_free_stack(),
+                                    m->compressed_free_stack_size());
+    free_stack.PrintStack();
+    Printf("previously allocated by thread T%d here:\n",
+           alloc_thread->tid());
+
+    alloc_stack.PrintStack();
+    t->summary()->Announce();
+    free_thread->Announce();
+    alloc_thread->Announce();
+  } else {
+    Printf("allocated by thread T%d here:\n", alloc_thread->tid());
+    alloc_stack.PrintStack();
+    t->summary()->Announce();
+    alloc_thread->Announce();
+  }
+}
+
+static uint8_t *Allocate(size_t alignment, size_t size, AsanStackTrace *stack) {
+  __asan_init();
+  CHECK(stack);
+  if (size == 0) {
+    size = 1;  // TODO(kcc): do something smarter
+  }
+  CHECK(IsPowerOfTwo(alignment));
+  size_t rounded_size = RoundUpTo(size, REDZONE);
+  size_t needed_size = rounded_size + REDZONE;
+  if (alignment > REDZONE) {
+    needed_size += alignment;
+  }
+  CHECK(IsAligned(needed_size, REDZONE));
+  if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize) {
+    Report("WARNING: AddressSanitizer failed to allocate %p bytes\n", size);
+    return 0;
+  }
+
+  uint8_t size_class = SizeToSizeClass(needed_size);
+  size_t size_to_allocate = SizeClassToSize(size_class);
+  CHECK(size_to_allocate >= kMinAllocSize);
+  CHECK(size_to_allocate >= needed_size);
+  CHECK(IsAligned(size_to_allocate, REDZONE));
+
+  if (FLAG_v >= 3) {
+    Printf("Allocate align: %zu size: %zu class: %u real: %zu\n",
+         alignment, size, size_class, size_to_allocate);
+  }
+
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+  // Statistics
+  thread_stats.mallocs++;
+  thread_stats.malloced += size;
+  thread_stats.malloced_redzones += size_to_allocate - size;
+  thread_stats.malloced_by_size[size_class]++;
+
+  AsanChunk *m = NULL;
+  if (!t || size_to_allocate >= kMaxSizeForThreadLocalFreeList) {
+    // get directly from global storage.
+    m = malloc_info.AllocateChunks(size_class, 1);
+    thread_stats.malloc_large++;
+  } else {
+    // get from the thread-local storage.
+    AsanChunk **fl = &t->malloc_storage().free_lists_[size_class];
+    if (!*fl) {
+      size_t n_new_chunks = kMaxSizeForThreadLocalFreeList / size_to_allocate;
+      *fl = malloc_info.AllocateChunks(size_class, n_new_chunks);
+      thread_stats.malloc_small_slow++;
+    }
+    m = *fl;
+    *fl = (*fl)->next;
+  }
+  CHECK(m);
+  CHECK(m->chunk_state == CHUNK_AVAILABLE);
+  m->chunk_state = CHUNK_ALLOCATED;
+  m->next = NULL;
+  CHECK(m->Size() == size_to_allocate);
+  uintptr_t addr = (uintptr_t)m + REDZONE;
+  CHECK(addr == (uintptr_t)m->compressed_free_stack());
+
+  if (alignment > REDZONE && (addr & (alignment - 1))) {
+    addr = RoundUpTo(addr, alignment);
+    CHECK((addr & (alignment - 1)) == 0);
+    AsanChunk *p = (AsanChunk*)(addr - REDZONE);
+    p->chunk_state = CHUNK_MEMALIGN;
+    p->next = m;
+  }
+  CHECK(m == PtrToChunk(addr));
+  m->used_size = size;
+  m->offset = addr - (uintptr_t)m;
+  CHECK(m->beg() == addr);
+  m->alloc_tid = t ? t->tid() : 0;
+  m->free_tid   = AsanThread::kInvalidTid;
+  AsanStackTrace::CompressStack(stack, m->compressed_alloc_stack(),
+                                m->compressed_alloc_stack_size());
+  PoisonShadow(addr, rounded_size, 0);
+  if (size < rounded_size) {
+    PoisonHeapPartialRightRedzone(addr + rounded_size - REDZONE,
+                                  size & (REDZONE - 1));
+  }
+  if (size <= FLAG_max_malloc_fill_size) {
+    REAL(memset)((void*)addr, 0, rounded_size);
+  }
+  return (uint8_t*)addr;
+}
+
+static void Deallocate(uint8_t *ptr, AsanStackTrace *stack) {
+  if (!ptr) return;
+  CHECK(stack);
+
+  if (FLAG_debug) {
+    CHECK(malloc_info.FindPageGroup((uintptr_t)ptr));
+  }
+
+  // Printf("Deallocate %p\n", ptr);
+  AsanChunk *m = PtrToChunk((uintptr_t)ptr);
+
+  // Flip the state atomically to avoid race on double-free.
+  uint16_t old_chunk_state = AtomicExchange(&m->chunk_state, CHUNK_QUARANTINE);
+
+  if (old_chunk_state == CHUNK_QUARANTINE) {
+    Report("ERROR: AddressSanitizer attempting double-free on %p:\n", ptr);
+    stack->PrintStack();
+    Describe((uintptr_t)ptr, 1);
+    ShowStatsAndAbort();
+  } else if (old_chunk_state != CHUNK_ALLOCATED) {
+    Report("ERROR: AddressSanitizer attempting free on address which was not"
+           " malloc()-ed: %p\n", ptr);
+    stack->PrintStack();
+    ShowStatsAndAbort();
+  }
+  CHECK(old_chunk_state == CHUNK_ALLOCATED);
+  CHECK(m->free_tid == AsanThread::kInvalidTid);
+  CHECK(m->alloc_tid >= 0);
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  m->free_tid = t ? t->tid() : 0;
+  AsanStackTrace::CompressStack(stack, m->compressed_free_stack(),
+                                m->compressed_free_stack_size());
+  size_t rounded_size = RoundUpTo(m->used_size, REDZONE);
+  PoisonShadow((uintptr_t)ptr, rounded_size, kAsanHeapFreeMagic);
+
+  // Statistics.
+  AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+  thread_stats.frees++;
+  thread_stats.freed += m->used_size;
+  thread_stats.freed_by_size[m->SizeClass()]++;
+
+  CHECK(m->chunk_state == CHUNK_QUARANTINE);
+  if (t) {
+    AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
+    CHECK(!m->next);
+    ms->quarantine_.Push(m);
+
+    if (ms->quarantine_.size() > kMaxThreadLocalQuarantine) {
+      malloc_info.SwallowThreadLocalMallocStorage(ms, false);
+    }
+  } else {
+    CHECK(!m->next);
+    malloc_info.BypassThreadLocalQuarantine(m);
+  }
+}
+
+static uint8_t *Reallocate(uint8_t *old_ptr, size_t new_size,
+                           AsanStackTrace *stack) {
+  CHECK(old_ptr && new_size);
+
+  // Statistics.
+  AsanStats &thread_stats = asanThreadRegistry().GetCurrentThreadStats();
+  thread_stats.reallocs++;
+  thread_stats.realloced += new_size;
+
+  AsanChunk *m = PtrToChunk((uintptr_t)old_ptr);
+  CHECK(m->chunk_state == CHUNK_ALLOCATED);
+  size_t old_size = m->used_size;
+  size_t memcpy_size = Min(new_size, old_size);
+  uint8_t *new_ptr = Allocate(0, new_size, stack);
+  if (new_ptr) {
+    CHECK(REAL(memcpy) != NULL);
+    REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
+    Deallocate(old_ptr, stack);
+  }
+  return new_ptr;
+}
+
+}  // namespace __asan
+
+// Malloc hooks declaration.
+// ASAN_NEW_HOOK(ptr, size) is called immediately after
+//   allocation of "size" bytes, which returned "ptr".
+// ASAN_DELETE_HOOK(ptr) is called immediately before
+//   deallocation of "ptr".
+// If ASAN_NEW_HOOK or ASAN_DELETE_HOOK is defined, user
+// program must provide implementation of this hook.
+// If macro is undefined, the hook is no-op.
+#ifdef ASAN_NEW_HOOK
+extern "C" void ASAN_NEW_HOOK(void *ptr, size_t size);
+#else
+static inline void ASAN_NEW_HOOK(void *ptr, size_t size) { }
+#endif
+
+#ifdef ASAN_DELETE_HOOK
+extern "C" void ASAN_DELETE_HOOK(void *ptr);
+#else
+static inline void ASAN_DELETE_HOOK(void *ptr) { }
+#endif
+
+namespace __asan {
+
+void *asan_memalign(size_t alignment, size_t size, AsanStackTrace *stack) {
+  void *ptr = (void*)Allocate(alignment, size, stack);
+  ASAN_NEW_HOOK(ptr, size);
+  return ptr;
+}
+
+void asan_free(void *ptr, AsanStackTrace *stack) {
+  ASAN_DELETE_HOOK(ptr);
+  Deallocate((uint8_t*)ptr, stack);
+}
+
+void *asan_malloc(size_t size, AsanStackTrace *stack) {
+  void *ptr = (void*)Allocate(0, size, stack);
+  ASAN_NEW_HOOK(ptr, size);
+  return ptr;
+}
+
+void *asan_calloc(size_t nmemb, size_t size, AsanStackTrace *stack) {
+  void *ptr = (void*)Allocate(0, nmemb * size, stack);
+  if (ptr)
+    REAL(memset)(ptr, 0, nmemb * size);
+  ASAN_NEW_HOOK(ptr, nmemb * size);
+  return ptr;
+}
+
+void *asan_realloc(void *p, size_t size, AsanStackTrace *stack) {
+  if (p == NULL) {
+    void *ptr = (void*)Allocate(0, size, stack);
+    ASAN_NEW_HOOK(ptr, size);
+    return ptr;
+  } else if (size == 0) {
+    ASAN_DELETE_HOOK(p);
+    Deallocate((uint8_t*)p, stack);
+    return NULL;
+  }
+  return Reallocate((uint8_t*)p, size, stack);
+}
+
+void *asan_valloc(size_t size, AsanStackTrace *stack) {
+  void *ptr = (void*)Allocate(kPageSize, size, stack);
+  ASAN_NEW_HOOK(ptr, size);
+  return ptr;
+}
+
+void *asan_pvalloc(size_t size, AsanStackTrace *stack) {
+  size = RoundUpTo(size, kPageSize);
+  if (size == 0) {
+    // pvalloc(0) should allocate one page.
+    size = kPageSize;
+  }
+  void *ptr = (void*)Allocate(kPageSize, size, stack);
+  ASAN_NEW_HOOK(ptr, size);
+  return ptr;
+}
+
+int asan_posix_memalign(void **memptr, size_t alignment, size_t size,
+                          AsanStackTrace *stack) {
+  void *ptr = Allocate(alignment, size, stack);
+  CHECK(IsAligned((uintptr_t)ptr, alignment));
+  ASAN_NEW_HOOK(ptr, size);
+  *memptr = ptr;
+  return 0;
+}
+
+size_t asan_malloc_usable_size(void *ptr, AsanStackTrace *stack) {
+  CHECK(stack);
+  if (ptr == NULL) return 0;
+  size_t usable_size = malloc_info.AllocationSize((uintptr_t)ptr);
+  if (usable_size == 0) {
+    Report("ERROR: AddressSanitizer attempting to call malloc_usable_size() "
+           "for pointer which is not owned: %p\n", ptr);
+    stack->PrintStack();
+    Describe((uintptr_t)ptr, 1);
+    ShowStatsAndAbort();
+  }
+  return usable_size;
+}
+
+size_t asan_mz_size(const void *ptr) {
+  return malloc_info.AllocationSize((uintptr_t)ptr);
+}
+
+void DescribeHeapAddress(uintptr_t addr, uintptr_t access_size) {
+  Describe(addr, access_size);
+}
+
+void asan_mz_force_lock() {
+  malloc_info.ForceLock();
+}
+
+void asan_mz_force_unlock() {
+  malloc_info.ForceUnlock();
+}
+
+// ---------------------- Fake stack-------------------- {{{1
+FakeStack::FakeStack() {
+  CHECK(REAL(memset) != NULL);
+  REAL(memset)(this, 0, sizeof(*this));
+}
+
+bool FakeStack::AddrIsInSizeClass(uintptr_t addr, size_t size_class) {
+  uintptr_t mem = allocated_size_classes_[size_class];
+  uintptr_t size = ClassMmapSize(size_class);
+  bool res = mem && addr >= mem && addr < mem + size;
+  return res;
+}
+
+uintptr_t FakeStack::AddrIsInFakeStack(uintptr_t addr) {
+  for (size_t i = 0; i < kNumberOfSizeClasses; i++) {
+    if (AddrIsInSizeClass(addr, i)) return allocated_size_classes_[i];
+  }
+  return 0;
+}
+
+// We may want to compute this during compilation.
+inline size_t FakeStack::ComputeSizeClass(size_t alloc_size) {
+  size_t rounded_size = RoundUpToPowerOfTwo(alloc_size);
+  size_t log = Log2(rounded_size);
+  CHECK(alloc_size <= (1UL << log));
+  if (!(alloc_size > (1UL << (log-1)))) {
+    Printf("alloc_size %zu log %zu\n", alloc_size, log);
+  }
+  CHECK(alloc_size > (1UL << (log-1)));
+  size_t res = log < kMinStackFrameSizeLog ? 0 : log - kMinStackFrameSizeLog;
+  CHECK(res < kNumberOfSizeClasses);
+  CHECK(ClassSize(res) >= rounded_size);
+  return res;
+}
+
+void FakeFrameFifo::FifoPush(FakeFrame *node) {
+  CHECK(node);
+  node->next = 0;
+  if (first_ == 0 && last_ == 0) {
+    first_ = last_ = node;
+  } else {
+    CHECK(first_);
+    CHECK(last_);
+    last_->next = node;
+    last_ = node;
+  }
+}
+
+FakeFrame *FakeFrameFifo::FifoPop() {
+  CHECK(first_ && last_ && "Exhausted fake stack");
+  FakeFrame *res = 0;
+  if (first_ == last_) {
+    res = first_;
+    first_ = last_ = 0;
+  } else {
+    res = first_;
+    first_ = first_->next;
+  }
+  return res;
+}
+
+void FakeStack::Init(size_t stack_size) {
+  stack_size_ = stack_size;
+  alive_ = true;
+}
+
+void FakeStack::Cleanup() {
+  alive_ = false;
+  for (size_t i = 0; i < kNumberOfSizeClasses; i++) {
+    uintptr_t mem = allocated_size_classes_[i];
+    if (mem) {
+      PoisonShadow(mem, ClassMmapSize(i), 0);
+      allocated_size_classes_[i] = 0;
+      AsanUnmapOrDie((void*)mem, ClassMmapSize(i));
+    }
+  }
+}
+
+size_t FakeStack::ClassMmapSize(size_t size_class) {
+  return RoundUpToPowerOfTwo(stack_size_);
+}
+
+void FakeStack::AllocateOneSizeClass(size_t size_class) {
+  CHECK(ClassMmapSize(size_class) >= kPageSize);
+  uintptr_t new_mem = (uintptr_t)AsanMmapSomewhereOrDie(
+      ClassMmapSize(size_class), __FUNCTION__);
+  // Printf("T%d new_mem[%zu]: %p-%p mmap %zu\n",
+  //       asanThreadRegistry().GetCurrent()->tid(),
+  //       size_class, new_mem, new_mem + ClassMmapSize(size_class),
+  //       ClassMmapSize(size_class));
+  size_t i;
+  for (i = 0; i < ClassMmapSize(size_class);
+       i += ClassSize(size_class)) {
+    size_classes_[size_class].FifoPush((FakeFrame*)(new_mem + i));
+  }
+  CHECK(i == ClassMmapSize(size_class));
+  allocated_size_classes_[size_class] = new_mem;
+}
+
+uintptr_t FakeStack::AllocateStack(size_t size, size_t real_stack) {
+  if (!alive_) return real_stack;
+  CHECK(size <= kMaxStackMallocSize && size > 1);
+  size_t size_class = ComputeSizeClass(size);
+  if (!allocated_size_classes_[size_class]) {
+    AllocateOneSizeClass(size_class);
+  }
+  FakeFrame *fake_frame = size_classes_[size_class].FifoPop();
+  CHECK(fake_frame);
+  fake_frame->size_minus_one = size - 1;
+  fake_frame->real_stack = real_stack;
+  while (FakeFrame *top = call_stack_.top()) {
+    if (top->real_stack > real_stack) break;
+    call_stack_.LifoPop();
+    DeallocateFrame(top);
+  }
+  call_stack_.LifoPush(fake_frame);
+  uintptr_t ptr = (uintptr_t)fake_frame;
+  PoisonShadow(ptr, size, 0);
+  return ptr;
+}
+
+void FakeStack::DeallocateFrame(FakeFrame *fake_frame) {
+  CHECK(alive_);
+  size_t size = fake_frame->size_minus_one + 1;
+  size_t size_class = ComputeSizeClass(size);
+  CHECK(allocated_size_classes_[size_class]);
+  uintptr_t ptr = (uintptr_t)fake_frame;
+  CHECK(AddrIsInSizeClass(ptr, size_class));
+  CHECK(AddrIsInSizeClass(ptr + size - 1, size_class));
+  size_classes_[size_class].FifoPush(fake_frame);
+}
+
+void FakeStack::OnFree(size_t ptr, size_t size, size_t real_stack) {
+  FakeFrame *fake_frame = (FakeFrame*)ptr;
+  CHECK(fake_frame->magic = kRetiredStackFrameMagic);
+  CHECK(fake_frame->descr != 0);
+  CHECK(fake_frame->size_minus_one == size - 1);
+  PoisonShadow(ptr, size, kAsanStackAfterReturnMagic);
+}
+
+}  // namespace __asan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+size_t __asan_stack_malloc(size_t size, size_t real_stack) {
+  if (!FLAG_use_fake_stack) return real_stack;
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  if (!t) {
+    // TSD is gone, use the real stack.
+    return real_stack;
+  }
+  size_t ptr = t->fake_stack().AllocateStack(size, real_stack);
+  // Printf("__asan_stack_malloc %p %zu %p\n", ptr, size, real_stack);
+  return ptr;
+}
+
+void __asan_stack_free(size_t ptr, size_t size, size_t real_stack) {
+  if (!FLAG_use_fake_stack) return;
+  if (ptr != real_stack) {
+    FakeStack::OnFree(ptr, size, real_stack);
+  }
+}
+
+// ASan allocator doesn't reserve extra bytes, so normally we would
+// just return "size".
+size_t __asan_get_estimated_allocated_size(size_t size) {
+  if (size == 0) return 1;
+  return Min(size, kMaxAllowedMallocSize);
+}
+
+bool __asan_get_ownership(const void *p) {
+  return malloc_info.AllocationSize((uintptr_t)p) > 0;
+}
+
+size_t __asan_get_allocated_size(const void *p) {
+  if (p == NULL) return 0;
+  size_t allocated_size = malloc_info.AllocationSize((uintptr_t)p);
+  // Die if p is not malloced or if it is already freed.
+  if (allocated_size == 0) {
+    Report("ERROR: AddressSanitizer attempting to call "
+           "__asan_get_allocated_size() for pointer which is "
+           "not owned: %p\n", p);
+    PRINT_CURRENT_STACK();
+    Describe((uintptr_t)p, 1);
+    ShowStatsAndAbort();
+  }
+  return allocated_size;
+}
diff --git a/lib/asan/asan_allocator.h b/lib/asan/asan_allocator.h
new file mode 100644
index 0000000..cc6ac84
--- /dev/null
+++ b/lib/asan/asan_allocator.h
@@ -0,0 +1,159 @@
+//===-- asan_allocator.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header for asan_allocator.cc.
+//===----------------------------------------------------------------------===//
+
+#ifndef ASAN_ALLOCATOR_H
+#define ASAN_ALLOCATOR_H
+
+#include "asan_internal.h"
+#include "asan_interceptors.h"
+
+namespace __asan {
+
+static const size_t kNumberOfSizeClasses = 255;
+struct AsanChunk;
+
+class AsanChunkFifoList {
+ public:
+  explicit AsanChunkFifoList(LinkerInitialized) { }
+  AsanChunkFifoList() { clear(); }
+  void Push(AsanChunk *n);
+  void PushList(AsanChunkFifoList *q);
+  AsanChunk *Pop();
+  size_t size() { return size_; }
+  void clear() {
+    first_ = last_ = NULL;
+    size_ = 0;
+  }
+ private:
+  AsanChunk *first_;
+  AsanChunk *last_;
+  size_t size_;
+};
+
+struct AsanThreadLocalMallocStorage {
+  explicit AsanThreadLocalMallocStorage(LinkerInitialized x)
+      : quarantine_(x) { }
+  AsanThreadLocalMallocStorage() {
+    CHECK(REAL(memset));
+    REAL(memset)(this, 0, sizeof(AsanThreadLocalMallocStorage));
+  }
+
+  AsanChunkFifoList quarantine_;
+  AsanChunk *free_lists_[kNumberOfSizeClasses];
+  void CommitBack();
+};
+
+// Fake stack frame contains local variables of one function.
+// This struct should fit into a stack redzone (32 bytes).
+struct FakeFrame {
+  uintptr_t magic;  // Modified by the instrumented code.
+  uintptr_t descr;  // Modified by the instrumented code.
+  FakeFrame *next;
+  uint64_t real_stack     : 48;
+  uint64_t size_minus_one : 16;
+};
+
+struct FakeFrameFifo {
+ public:
+  void FifoPush(FakeFrame *node);
+  FakeFrame *FifoPop();
+ private:
+  FakeFrame *first_, *last_;
+};
+
+class FakeFrameLifo {
+ public:
+  void LifoPush(FakeFrame *node) {
+    node->next = top_;
+    top_ = node;
+  }
+  void LifoPop() {
+    CHECK(top_);
+    top_ = top_->next;
+  }
+  FakeFrame *top() { return top_; }
+ private:
+  FakeFrame *top_;
+};
+
+// For each thread we create a fake stack and place stack objects on this fake
+// stack instead of the real stack. The fake stack is not really a stack but
+// a fast malloc-like allocator so that when a function exits the fake stack
+// is not poped but remains there for quite some time until gets used again.
+// So, we poison the objects on the fake stack when function returns.
+// It helps us find use-after-return bugs.
+// We can not rely on __asan_stack_free being called on every function exit,
+// so we maintain a lifo list of all current fake frames and update it on every
+// call to __asan_stack_malloc.
+class FakeStack {
+ public:
+  FakeStack();
+  explicit FakeStack(LinkerInitialized) {}
+  void Init(size_t stack_size);
+  void StopUsingFakeStack() { alive_ = false; }
+  void Cleanup();
+  uintptr_t AllocateStack(size_t size, size_t real_stack);
+  static void OnFree(size_t ptr, size_t size, size_t real_stack);
+  // Return the bottom of the maped region.
+  uintptr_t AddrIsInFakeStack(uintptr_t addr);
+  bool StackSize() { return stack_size_; }
+ private:
+  static const size_t kMinStackFrameSizeLog = 9;  // Min frame is 512B.
+  static const size_t kMaxStackFrameSizeLog = 16;  // Max stack frame is 64K.
+  static const size_t kMaxStackMallocSize = 1 << kMaxStackFrameSizeLog;
+  static const size_t kNumberOfSizeClasses =
+      kMaxStackFrameSizeLog - kMinStackFrameSizeLog + 1;
+
+  bool AddrIsInSizeClass(uintptr_t addr, size_t size_class);
+
+  // Each size class should be large enough to hold all frames.
+  size_t ClassMmapSize(size_t size_class);
+
+  size_t ClassSize(size_t size_class) {
+    return 1UL << (size_class + kMinStackFrameSizeLog);
+  }
+
+  void DeallocateFrame(FakeFrame *fake_frame);
+
+  size_t ComputeSizeClass(size_t alloc_size);
+  void AllocateOneSizeClass(size_t size_class);
+
+  size_t stack_size_;
+  bool   alive_;
+
+  uintptr_t allocated_size_classes_[kNumberOfSizeClasses];
+  FakeFrameFifo size_classes_[kNumberOfSizeClasses];
+  FakeFrameLifo call_stack_;
+};
+
+void *asan_memalign(size_t alignment, size_t size, AsanStackTrace *stack);
+void asan_free(void *ptr, AsanStackTrace *stack);
+
+void *asan_malloc(size_t size, AsanStackTrace *stack);
+void *asan_calloc(size_t nmemb, size_t size, AsanStackTrace *stack);
+void *asan_realloc(void *p, size_t size, AsanStackTrace *stack);
+void *asan_valloc(size_t size, AsanStackTrace *stack);
+void *asan_pvalloc(size_t size, AsanStackTrace *stack);
+
+int asan_posix_memalign(void **memptr, size_t alignment, size_t size,
+                          AsanStackTrace *stack);
+size_t asan_malloc_usable_size(void *ptr, AsanStackTrace *stack);
+
+size_t asan_mz_size(const void *ptr);
+void asan_mz_force_lock();
+void asan_mz_force_unlock();
+void DescribeHeapAddress(uintptr_t addr, size_t access_size);
+
+}  // namespace __asan
+#endif  // ASAN_ALLOCATOR_H
diff --git a/lib/asan/asan_globals.cc b/lib/asan/asan_globals.cc
new file mode 100644
index 0000000..427df14
--- /dev/null
+++ b/lib/asan/asan_globals.cc
@@ -0,0 +1,171 @@
+//===-- asan_globals.cc -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Handle globals.
+//===----------------------------------------------------------------------===//
+#include "asan_interceptors.h"
+#include "asan_interface.h"
+#include "asan_internal.h"
+#include "asan_lock.h"
+#include "asan_mapping.h"
+#include "asan_stack.h"
+#include "asan_stats.h"
+#include "asan_thread.h"
+
+#include <ctype.h>
+
+namespace __asan {
+
+typedef __asan_global Global;
+
+struct ListOfGlobals {
+  const Global *g;
+  ListOfGlobals *next;
+};
+
+static AsanLock mu_for_globals(LINKER_INITIALIZED);
+static ListOfGlobals *list_of_globals;
+static LowLevelAllocator allocator_for_globals(LINKER_INITIALIZED);
+
+void PoisonRedZones(const Global &g)  {
+  size_t shadow_rz_size = kGlobalAndStackRedzone >> SHADOW_SCALE;
+  CHECK(shadow_rz_size == 1 || shadow_rz_size == 2 || shadow_rz_size == 4);
+  // full right redzone
+  size_t g_aligned_size = kGlobalAndStackRedzone *
+      ((g.size + kGlobalAndStackRedzone - 1) / kGlobalAndStackRedzone);
+  PoisonShadow(g.beg + g_aligned_size,
+               kGlobalAndStackRedzone, kAsanGlobalRedzoneMagic);
+  if ((g.size % kGlobalAndStackRedzone) != 0) {
+    // partial right redzone
+    uint64_t g_aligned_down_size = kGlobalAndStackRedzone *
+        (g.size / kGlobalAndStackRedzone);
+    CHECK(g_aligned_down_size == g_aligned_size - kGlobalAndStackRedzone);
+    PoisonShadowPartialRightRedzone(g.beg + g_aligned_down_size,
+                                    g.size % kGlobalAndStackRedzone,
+                                    kGlobalAndStackRedzone,
+                                    kAsanGlobalRedzoneMagic);
+  }
+}
+
+static size_t GetAlignedSize(size_t size) {
+  return ((size + kGlobalAndStackRedzone - 1) / kGlobalAndStackRedzone)
+      * kGlobalAndStackRedzone;
+}
+
+  // Check if the global is a zero-terminated ASCII string. If so, print it.
+void PrintIfASCII(const Global &g) {
+  for (size_t p = g.beg; p < g.beg + g.size - 1; p++) {
+    if (!isascii(*(char*)p)) return;
+  }
+  if (*(char*)(g.beg + g.size - 1) != 0) return;
+  Printf("  '%s' is ascii string '%s'\n", g.name, g.beg);
+}
+
+bool DescribeAddrIfMyRedZone(const Global &g, uintptr_t addr) {
+  if (addr < g.beg - kGlobalAndStackRedzone) return false;
+  if (addr >= g.beg + g.size_with_redzone) return false;
+  Printf("%p is located ", addr);
+  if (addr < g.beg) {
+    Printf("%zd bytes to the left", g.beg - addr);
+  } else if (addr >= g.beg + g.size) {
+    Printf("%zd bytes to the right", addr - (g.beg + g.size));
+  } else {
+    Printf("%zd bytes inside", addr - g.beg);  // Can it happen?
+  }
+  Printf(" of global variable '%s' (0x%zx) of size %zu\n",
+         g.name, g.beg, g.size);
+  PrintIfASCII(g);
+  return true;
+}
+
+
+bool DescribeAddrIfGlobal(uintptr_t addr) {
+  if (!FLAG_report_globals) return false;
+  ScopedLock lock(&mu_for_globals);
+  bool res = false;
+  for (ListOfGlobals *l = list_of_globals; l; l = l->next) {
+    const Global &g = *l->g;
+    if (FLAG_report_globals >= 2)
+      Printf("Search Global: beg=%p size=%zu name=%s\n",
+             g.beg, g.size, g.name);
+    res |= DescribeAddrIfMyRedZone(g, addr);
+  }
+  return res;
+}
+
+// Register a global variable.
+// This function may be called more than once for every global
+// so we store the globals in a map.
+static void RegisterGlobal(const Global *g) {
+  CHECK(asan_inited);
+  CHECK(FLAG_report_globals);
+  CHECK(AddrIsInMem(g->beg));
+  CHECK(AddrIsAlignedByGranularity(g->beg));
+  CHECK(AddrIsAlignedByGranularity(g->size_with_redzone));
+  PoisonRedZones(*g);
+  ListOfGlobals *l =
+      (ListOfGlobals*)allocator_for_globals.Allocate(sizeof(ListOfGlobals));
+  l->g = g;
+  l->next = list_of_globals;
+  list_of_globals = l;
+  if (FLAG_report_globals >= 2)
+    Report("Added Global: beg=%p size=%zu name=%s\n",
+           g->beg, g->size, g->name);
+}
+
+static void UnregisterGlobal(const Global *g) {
+  CHECK(asan_inited);
+  CHECK(FLAG_report_globals);
+  CHECK(AddrIsInMem(g->beg));
+  CHECK(AddrIsAlignedByGranularity(g->beg));
+  CHECK(AddrIsAlignedByGranularity(g->size_with_redzone));
+  PoisonShadow(g->beg, g->size_with_redzone, 0);
+  // We unpoison the shadow memory for the global but we do not remove it from
+  // the list because that would require O(n^2) time with the current list
+  // implementation. It might not be worth doing anyway.
+}
+
+}  // namespace __asan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+// Register one global with a default redzone.
+void __asan_register_global(uintptr_t addr, size_t size,
+                            const char *name) {
+  if (!FLAG_report_globals) return;
+  ScopedLock lock(&mu_for_globals);
+  Global *g = (Global *)allocator_for_globals.Allocate(sizeof(Global));
+  g->beg = addr;
+  g->size = size;
+  g->size_with_redzone = GetAlignedSize(size) + kGlobalAndStackRedzone;
+  g->name = name;
+  RegisterGlobal(g);
+}
+
+// Register an array of globals.
+void __asan_register_globals(__asan_global *globals, size_t n) {
+  if (!FLAG_report_globals) return;
+  ScopedLock lock(&mu_for_globals);
+  for (size_t i = 0; i < n; i++) {
+    RegisterGlobal(&globals[i]);
+  }
+}
+
+// Unregister an array of globals.
+// We must do it when a shared objects gets dlclosed.
+void __asan_unregister_globals(__asan_global *globals, size_t n) {
+  if (!FLAG_report_globals) return;
+  ScopedLock lock(&mu_for_globals);
+  for (size_t i = 0; i < n; i++) {
+    UnregisterGlobal(&globals[i]);
+  }
+}
diff --git a/lib/asan/asan_interceptors.cc b/lib/asan/asan_interceptors.cc
new file mode 100644
index 0000000..6d7e63b
--- /dev/null
+++ b/lib/asan/asan_interceptors.cc
@@ -0,0 +1,850 @@
+//===-- asan_interceptors.cc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Intercept various libc functions.
+//===----------------------------------------------------------------------===//
+#include "asan_interceptors.h"
+
+#include "asan_allocator.h"
+#include "asan_interface.h"
+#include "asan_internal.h"
+#include "asan_mapping.h"
+#include "asan_stack.h"
+#include "asan_stats.h"
+#include "asan_thread_registry.h"
+#include "interception/interception.h"
+
+// Use macro to describe if specific function should be
+// intercepted on a given platform.
+#if !defined(_WIN32)
+# define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 1
+#else
+# define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 0
+#endif
+
+#if !defined(__APPLE__)
+# define ASAN_INTERCEPT_STRNLEN 1
+#else
+# define ASAN_INTERCEPT_STRNLEN 0
+#endif
+
+#if defined(ANDROID) || defined(_WIN32)
+# define ASAN_INTERCEPT_SIGNAL_AND_SIGACTION 0
+#else
+# define ASAN_INTERCEPT_SIGNAL_AND_SIGACTION 1
+#endif
+
+// Use extern declarations of intercepted functions on Mac and Windows
+// to avoid including system headers.
+#if defined(__APPLE__) || (defined(_WIN32) && !defined(_DLL))
+extern "C" {
+// signal.h
+# if ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
+struct sigaction;
+int sigaction(int sig, const struct sigaction *act,
+              struct sigaction *oldact);
+void *signal(int signum, void *handler);
+# endif
+
+// setjmp.h
+void longjmp(void* env, int value);
+# if !defined(_WIN32)
+void _longjmp(void *env, int value);
+# endif
+
+// string.h / strings.h
+int memcmp(const void *a1, const void *a2, size_t size);
+void* memmove(void *to, const void *from, size_t size);
+void* memcpy(void *to, const void *from, size_t size);
+void* memset(void *block, int c, size_t size);
+char* strchr(const char *str, int c);
+# if defined(__APPLE__)
+char* index(const char *string, int c);
+# endif
+char* strcat(char *to, const char* from);  // NOLINT
+char* strcpy(char *to, const char* from);  // NOLINT
+char* strncpy(char *to, const char* from, size_t size);
+int strcmp(const char *s1, const char* s2);
+int strncmp(const char *s1, const char* s2, size_t size);
+# if !defined(_WIN32)
+int strcasecmp(const char *s1, const char *s2);
+int strncasecmp(const char *s1, const char *s2, size_t n);
+char* strdup(const char *s);
+# endif
+size_t strlen(const char *s);
+# if ASAN_INTERCEPT_STRNLEN
+size_t strnlen(const char *s, size_t maxlen);
+# endif
+
+// stdlib.h
+int atoi(const char *nptr);
+long atol(const char *nptr);  // NOLINT
+long strtol(const char *nptr, char **endptr, int base);  // NOLINT
+# if ASAN_INTERCEPT_ATOLL_AND_STRTOLL
+long long atoll(const char *nptr);  // NOLINT
+long long strtoll(const char *nptr, char **endptr, int base);  // NOLINT
+# endif
+
+// Windows threads.
+# if defined(_WIN32)
+__declspec(dllimport)
+void* __stdcall CreateThread(void *sec, size_t st, void* start,
+                             void *arg, DWORD fl, DWORD *id);
+# endif
+
+// Posix threads.
+# if !defined(_WIN32)
+int pthread_create(void *thread, void *attr, void *(*start_routine)(void*),
+                   void *arg);
+# endif
+}  // extern "C"
+#endif
+
+namespace __asan {
+
+// Instruments read/write access to a single byte in memory.
+// On error calls __asan_report_error, which aborts the program.
+#define ACCESS_ADDRESS(address, isWrite)   do {         \
+  if (AddressIsPoisoned(address)) {                     \
+    GET_CURRENT_PC_BP_SP;                               \
+    __asan_report_error(pc, bp, sp, address, isWrite, /* access_size */ 1); \
+  } \
+} while (0)
+
+// We implement ACCESS_MEMORY_RANGE, ASAN_READ_RANGE,
+// and ASAN_WRITE_RANGE as macro instead of function so
+// that no extra frames are created, and stack trace contains
+// relevant information only.
+
+// Instruments read/write access to a memory range.
+// More complex implementation is possible, for now just
+// checking the first and the last byte of a range.
+#define ACCESS_MEMORY_RANGE(offset, size, isWrite) do { \
+  if (size > 0) { \
+    uintptr_t ptr = (uintptr_t)(offset); \
+    ACCESS_ADDRESS(ptr, isWrite); \
+    ACCESS_ADDRESS(ptr + (size) - 1, isWrite); \
+  } \
+} while (0)
+
+#define ASAN_READ_RANGE(offset, size) do { \
+  ACCESS_MEMORY_RANGE(offset, size, false); \
+} while (0)
+
+#define ASAN_WRITE_RANGE(offset, size) do { \
+  ACCESS_MEMORY_RANGE(offset, size, true); \
+} while (0)
+
+// Behavior of functions like "memcpy" or "strcpy" is undefined
+// if memory intervals overlap. We report error in this case.
+// Macro is used to avoid creation of new frames.
+static inline bool RangesOverlap(const char *offset1, size_t length1,
+                                 const char *offset2, size_t length2) {
+  return !((offset1 + length1 <= offset2) || (offset2 + length2 <= offset1));
+}
+#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2) do { \
+  const char *offset1 = (const char*)_offset1; \
+  const char *offset2 = (const char*)_offset2; \
+  if (RangesOverlap(offset1, length1, offset2, length2)) { \
+    Report("ERROR: AddressSanitizer %s-param-overlap: " \
+           "memory ranges [%p,%p) and [%p, %p) overlap\n", \
+           name, offset1, offset1 + length1, offset2, offset2 + length2); \
+    PRINT_CURRENT_STACK(); \
+    ShowStatsAndAbort(); \
+  } \
+} while (0)
+
+#define ENSURE_ASAN_INITED() do { \
+  CHECK(!asan_init_is_running); \
+  if (!asan_inited) { \
+    __asan_init(); \
+  } \
+} while (0)
+
+static inline bool IsSpace(int c) {
+  return (c == ' ') || (c == '\n') || (c == '\t') ||
+         (c == '\f') || (c == '\r') || (c == '\v');
+}
+
+static inline bool IsDigit(int c) {
+  return (c >= '0') && (c <= '9');
+}
+
+static inline int ToLower(int c) {
+  return (c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c;
+}
+
+// ---------------------- Internal string functions ---------------- {{{1
+
+int64_t internal_simple_strtoll(const char *nptr, char **endptr, int base) {
+  CHECK(base == 10);
+  while (IsSpace(*nptr)) nptr++;
+  int sgn = 1;
+  uint64_t res = 0;
+  bool have_digits = false;
+  char *old_nptr = (char*)nptr;
+  if (*nptr == '+') {
+    sgn = 1;
+    nptr++;
+  } else if (*nptr == '-') {
+    sgn = -1;
+    nptr++;
+  }
+  while (IsDigit(*nptr)) {
+    res = (res <= UINT64_MAX / 10) ? res * 10 : UINT64_MAX;
+    int digit = ((*nptr) - '0');
+    res = (res <= UINT64_MAX - digit) ? res + digit : UINT64_MAX;
+    have_digits = true;
+    nptr++;
+  }
+  if (endptr != NULL) {
+    *endptr = (have_digits) ? (char*)nptr : old_nptr;
+  }
+  if (sgn > 0) {
+    return (int64_t)(Min((uint64_t)INT64_MAX, res));
+  } else {
+    return (res > INT64_MAX) ? INT64_MIN : ((int64_t)res * -1);
+  }
+}
+
+int64_t internal_atoll(const char *nptr) {
+  return internal_simple_strtoll(nptr, (char**)NULL, 10);
+}
+
+size_t internal_strlen(const char *s) {
+  size_t i = 0;
+  while (s[i]) i++;
+  return i;
+}
+
+size_t internal_strnlen(const char *s, size_t maxlen) {
+#if ASAN_INTERCEPT_STRNLEN
+  if (REAL(strnlen) != NULL) {
+    return REAL(strnlen)(s, maxlen);
+  }
+#endif
+  size_t i = 0;
+  while (i < maxlen && s[i]) i++;
+  return i;
+}
+
+char* internal_strchr(const char *s, int c) {
+  while (true) {
+    if (*s == (char)c)
+      return (char*)s;
+    if (*s == 0)
+      return NULL;
+    s++;
+  }
+}
+
+void* internal_memchr(const void* s, int c, size_t n) {
+  const char* t = (char*)s;
+  for (size_t i = 0; i < n; ++i, ++t)
+    if (*t == c)
+      return (void*)t;
+  return NULL;
+}
+
+int internal_memcmp(const void* s1, const void* s2, size_t n) {
+  const char* t1 = (char*)s1;
+  const char* t2 = (char*)s2;
+  for (size_t i = 0; i < n; ++i, ++t1, ++t2)
+    if (*t1 != *t2)
+      return *t1 < *t2 ? -1 : 1;
+  return 0;
+}
+
+// Should not be used in performance-critical places.
+void* internal_memset(void* s, int c, size_t n) {
+  // The next line prevents Clang from making a call to memset() instead of the
+  // loop below.
+  // FIXME: building the runtime with -ffreestanding is a better idea. However
+  // there currently are linktime problems due to PR12396.
+  char volatile *t = (char*)s;
+  for (size_t i = 0; i < n; ++i, ++t) {
+    *t = c;
+  }
+  return s;
+}
+
+char *internal_strstr(const char *haystack, const char *needle) {
+  // This is O(N^2), but we are not using it in hot places.
+  size_t len1 = internal_strlen(haystack);
+  size_t len2 = internal_strlen(needle);
+  if (len1 < len2) return 0;
+  for (size_t pos = 0; pos <= len1 - len2; pos++) {
+    if (internal_memcmp(haystack + pos, needle, len2) == 0)
+      return (char*)haystack + pos;
+  }
+  return 0;
+}
+
+char *internal_strncat(char *dst, const char *src, size_t n) {
+  size_t len = internal_strlen(dst);
+  size_t i;
+  for (i = 0; i < n && src[i]; i++)
+    dst[len + i] = src[i];
+  dst[len + i] = 0;
+  return dst;
+}
+
+int internal_strcmp(const char *s1, const char *s2) {
+  while (true) {
+    unsigned c1 = *s1;
+    unsigned c2 = *s2;
+    if (c1 != c2) return (c1 < c2) ? -1 : 1;
+    if (c1 == 0) break;
+    s1++;
+    s2++;
+  }
+  return 0;
+}
+
+char *internal_strncpy(char *dst, const char *src, size_t n) {
+  size_t i;
+  for (i = 0; i < n && src[i]; i++)
+    dst[i] = src[i];
+  return dst;
+}
+
+}  // namespace __asan
+
+// ---------------------- Wrappers ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+static thread_return_t THREAD_CALLING_CONV asan_thread_start(void *arg) {
+  AsanThread *t = (AsanThread*)arg;
+  asanThreadRegistry().SetCurrent(t);
+  return t->ThreadStart();
+}
+
+#ifndef _WIN32
+INTERCEPTOR(int, pthread_create, void *thread,
+    void *attr, void *(*start_routine)(void*), void *arg) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  int current_tid = asanThreadRegistry().GetCurrentTidOrMinusOne();
+  AsanThread *t = AsanThread::Create(current_tid, start_routine, arg, &stack);
+  asanThreadRegistry().RegisterThread(t);
+  return REAL(pthread_create)(thread, attr, asan_thread_start, t);
+}
+#endif  // !_WIN32
+
+#if ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
+INTERCEPTOR(void*, signal, int signum, void *handler) {
+  if (!AsanInterceptsSignal(signum)) {
+    return REAL(signal)(signum, handler);
+  }
+  return NULL;
+}
+
+INTERCEPTOR(int, sigaction, int signum, const struct sigaction *act,
+                            struct sigaction *oldact) {
+  if (!AsanInterceptsSignal(signum)) {
+    return REAL(sigaction)(signum, act, oldact);
+  }
+  return 0;
+}
+#elif ASAN_POSIX
+// We need to have defined REAL(sigaction) on posix systems.
+DEFINE_REAL(int, sigaction, int signum, const struct sigaction *act,
+    struct sigaction *oldact);
+#endif  // ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
+
+INTERCEPTOR(void, longjmp, void *env, int val) {
+  __asan_handle_no_return();
+  REAL(longjmp)(env, val);
+}
+
+#if !defined(_WIN32)
+INTERCEPTOR(void, _longjmp, void *env, int val) {
+  __asan_handle_no_return();
+  REAL(_longjmp)(env, val);
+}
+
+INTERCEPTOR(void, siglongjmp, void *env, int val) {
+  __asan_handle_no_return();
+  REAL(siglongjmp)(env, val);
+}
+#endif
+
+#if ASAN_HAS_EXCEPTIONS == 1
+#ifdef __APPLE__
+extern "C" void __cxa_throw(void *a, void *b, void *c);
+#endif  // __APPLE__
+
+INTERCEPTOR(void, __cxa_throw, void *a, void *b, void *c) {
+  CHECK(REAL(__cxa_throw));
+  __asan_handle_no_return();
+  REAL(__cxa_throw)(a, b, c);
+}
+#endif
+
+// intercept mlock and friends.
+// Since asan maps 16T of RAM, mlock is completely unfriendly to asan.
+// All functions return 0 (success).
+static void MlockIsUnsupported() {
+  static bool printed = 0;
+  if (printed) return;
+  printed = true;
+  Printf("INFO: AddressSanitizer ignores mlock/mlockall/munlock/munlockall\n");
+}
+
+extern "C" {
+INTERCEPTOR_ATTRIBUTE
+int mlock(const void *addr, size_t len) {
+  MlockIsUnsupported();
+  return 0;
+}
+
+INTERCEPTOR_ATTRIBUTE
+int munlock(const void *addr, size_t len) {
+  MlockIsUnsupported();
+  return 0;
+}
+
+INTERCEPTOR_ATTRIBUTE
+int mlockall(int flags) {
+  MlockIsUnsupported();
+  return 0;
+}
+
+INTERCEPTOR_ATTRIBUTE
+int munlockall(void) {
+  MlockIsUnsupported();
+  return 0;
+}
+}  // extern "C"
+
+static inline int CharCmp(unsigned char c1, unsigned char c2) {
+  return (c1 == c2) ? 0 : (c1 < c2) ? -1 : 1;
+}
+
+static inline int CharCaseCmp(unsigned char c1, unsigned char c2) {
+  int c1_low = ToLower(c1);
+  int c2_low = ToLower(c2);
+  return c1_low - c2_low;
+}
+
+INTERCEPTOR(int, memcmp, const void *a1, const void *a2, size_t size) {
+  ENSURE_ASAN_INITED();
+  unsigned char c1 = 0, c2 = 0;
+  const unsigned char *s1 = (const unsigned char*)a1;
+  const unsigned char *s2 = (const unsigned char*)a2;
+  size_t i;
+  for (i = 0; i < size; i++) {
+    c1 = s1[i];
+    c2 = s2[i];
+    if (c1 != c2) break;
+  }
+  ASAN_READ_RANGE(s1, Min(i + 1, size));
+  ASAN_READ_RANGE(s2, Min(i + 1, size));
+  return CharCmp(c1, c2);
+}
+
+INTERCEPTOR(void*, memcpy, void *to, const void *from, size_t size) {
+  // memcpy is called during __asan_init() from the internals
+  // of printf(...).
+  if (asan_init_is_running) {
+    return REAL(memcpy)(to, from, size);
+  }
+  ENSURE_ASAN_INITED();
+  if (FLAG_replace_intrin) {
+    if (to != from) {
+      // We do not treat memcpy with to==from as a bug.
+      // See http://llvm.org/bugs/show_bug.cgi?id=11763.
+      CHECK_RANGES_OVERLAP("memcpy", to, size, from, size);
+    }
+    ASAN_WRITE_RANGE(from, size);
+    ASAN_READ_RANGE(to, size);
+  }
+  return REAL(memcpy)(to, from, size);
+}
+
+INTERCEPTOR(void*, memmove, void *to, const void *from, size_t size) {
+  ENSURE_ASAN_INITED();
+  if (FLAG_replace_intrin) {
+    ASAN_WRITE_RANGE(from, size);
+    ASAN_READ_RANGE(to, size);
+  }
+  return REAL(memmove)(to, from, size);
+}
+
+INTERCEPTOR(void*, memset, void *block, int c, size_t size) {
+  // memset is called inside Printf.
+  if (asan_init_is_running) {
+    return REAL(memset)(block, c, size);
+  }
+  ENSURE_ASAN_INITED();
+  if (FLAG_replace_intrin) {
+    ASAN_WRITE_RANGE(block, size);
+  }
+  return REAL(memset)(block, c, size);
+}
+
+INTERCEPTOR(char*, strchr, const char *str, int c) {
+  ENSURE_ASAN_INITED();
+  char *result = REAL(strchr)(str, c);
+  if (FLAG_replace_str) {
+    size_t bytes_read = (result ? result - str : REAL(strlen)(str)) + 1;
+    ASAN_READ_RANGE(str, bytes_read);
+  }
+  return result;
+}
+
+#ifdef __linux__
+INTERCEPTOR(char*, index, const char *string, int c)
+  ALIAS(WRAPPER_NAME(strchr));
+#else
+DEFINE_REAL(char*, index, const char *string, int c);
+#endif
+
+INTERCEPTOR(int, strcasecmp, const char *s1, const char *s2) {
+  ENSURE_ASAN_INITED();
+  unsigned char c1, c2;
+  size_t i;
+  for (i = 0; ; i++) {
+    c1 = (unsigned char)s1[i];
+    c2 = (unsigned char)s2[i];
+    if (CharCaseCmp(c1, c2) != 0 || c1 == '\0') break;
+  }
+  ASAN_READ_RANGE(s1, i + 1);
+  ASAN_READ_RANGE(s2, i + 1);
+  return CharCaseCmp(c1, c2);
+}
+
+INTERCEPTOR(char*, strcat, char *to, const char *from) {  // NOLINT
+  ENSURE_ASAN_INITED();
+  if (FLAG_replace_str) {
+    size_t from_length = REAL(strlen)(from);
+    ASAN_READ_RANGE(from, from_length + 1);
+    if (from_length > 0) {
+      size_t to_length = REAL(strlen)(to);
+      ASAN_READ_RANGE(to, to_length);
+      ASAN_WRITE_RANGE(to + to_length, from_length + 1);
+      CHECK_RANGES_OVERLAP("strcat", to, to_length + 1, from, from_length + 1);
+    }
+  }
+  return REAL(strcat)(to, from);  // NOLINT
+}
+
+INTERCEPTOR(int, strcmp, const char *s1, const char *s2) {
+  if (!asan_inited) {
+    return internal_strcmp(s1, s2);
+  }
+  unsigned char c1, c2;
+  size_t i;
+  for (i = 0; ; i++) {
+    c1 = (unsigned char)s1[i];
+    c2 = (unsigned char)s2[i];
+    if (c1 != c2 || c1 == '\0') break;
+  }
+  ASAN_READ_RANGE(s1, i + 1);
+  ASAN_READ_RANGE(s2, i + 1);
+  return CharCmp(c1, c2);
+}
+
+INTERCEPTOR(char*, strcpy, char *to, const char *from) {  // NOLINT
+  // strcpy is called from malloc_default_purgeable_zone()
+  // in __asan::ReplaceSystemAlloc() on Mac.
+  if (asan_init_is_running) {
+    return REAL(strcpy)(to, from);  // NOLINT
+  }
+  ENSURE_ASAN_INITED();
+  if (FLAG_replace_str) {
+    size_t from_size = REAL(strlen)(from) + 1;
+    CHECK_RANGES_OVERLAP("strcpy", to, from_size, from, from_size);
+    ASAN_READ_RANGE(from, from_size);
+    ASAN_WRITE_RANGE(to, from_size);
+  }
+  return REAL(strcpy)(to, from);  // NOLINT
+}
+
+INTERCEPTOR(char*, strdup, const char *s) {
+  ENSURE_ASAN_INITED();
+  if (FLAG_replace_str) {
+    size_t length = REAL(strlen)(s);
+    ASAN_READ_RANGE(s, length + 1);
+  }
+  return REAL(strdup)(s);
+}
+
+INTERCEPTOR(size_t, strlen, const char *s) {
+  // strlen is called from malloc_default_purgeable_zone()
+  // in __asan::ReplaceSystemAlloc() on Mac.
+  if (asan_init_is_running) {
+    return REAL(strlen)(s);
+  }
+  ENSURE_ASAN_INITED();
+  size_t length = REAL(strlen)(s);
+  if (FLAG_replace_str) {
+    ASAN_READ_RANGE(s, length + 1);
+  }
+  return length;
+}
+
+INTERCEPTOR(int, strncasecmp, const char *s1, const char *s2, size_t n) {
+  ENSURE_ASAN_INITED();
+  unsigned char c1 = 0, c2 = 0;
+  size_t i;
+  for (i = 0; i < n; i++) {
+    c1 = (unsigned char)s1[i];
+    c2 = (unsigned char)s2[i];
+    if (CharCaseCmp(c1, c2) != 0 || c1 == '\0') break;
+  }
+  ASAN_READ_RANGE(s1, Min(i + 1, n));
+  ASAN_READ_RANGE(s2, Min(i + 1, n));
+  return CharCaseCmp(c1, c2);
+}
+
+INTERCEPTOR(int, strncmp, const char *s1, const char *s2, size_t size) {
+  // strncmp is called from malloc_default_purgeable_zone()
+  // in __asan::ReplaceSystemAlloc() on Mac.
+  if (asan_init_is_running) {
+    return REAL(strncmp)(s1, s2, size);
+  }
+  unsigned char c1 = 0, c2 = 0;
+  size_t i;
+  for (i = 0; i < size; i++) {
+    c1 = (unsigned char)s1[i];
+    c2 = (unsigned char)s2[i];
+    if (c1 != c2 || c1 == '\0') break;
+  }
+  ASAN_READ_RANGE(s1, Min(i + 1, size));
+  ASAN_READ_RANGE(s2, Min(i + 1, size));
+  return CharCmp(c1, c2);
+}
+
+INTERCEPTOR(char*, strncpy, char *to, const char *from, size_t size) {
+  ENSURE_ASAN_INITED();
+  if (FLAG_replace_str) {
+    size_t from_size = Min(size, internal_strnlen(from, size) + 1);
+    CHECK_RANGES_OVERLAP("strncpy", to, from_size, from, from_size);
+    ASAN_READ_RANGE(from, from_size);
+    ASAN_WRITE_RANGE(to, size);
+  }
+  return REAL(strncpy)(to, from, size);
+}
+
+#if ASAN_INTERCEPT_STRNLEN
+INTERCEPTOR(size_t, strnlen, const char *s, size_t maxlen) {
+  ENSURE_ASAN_INITED();
+  size_t length = REAL(strnlen)(s, maxlen);
+  if (FLAG_replace_str) {
+    ASAN_READ_RANGE(s, Min(length + 1, maxlen));
+  }
+  return length;
+}
+#endif  // ASAN_INTERCEPT_STRNLEN
+
+static inline bool IsValidStrtolBase(int base) {
+  return (base == 0) || (2 <= base && base <= 36);
+}
+
+static inline void FixRealStrtolEndptr(const char *nptr, char **endptr) {
+  CHECK(endptr != NULL);
+  if (nptr == *endptr) {
+    // No digits were found at strtol call, we need to find out the last
+    // symbol accessed by strtoll on our own.
+    // We get this symbol by skipping leading blanks and optional +/- sign.
+    while (IsSpace(*nptr)) nptr++;
+    if (*nptr == '+' || *nptr == '-') nptr++;
+    *endptr = (char*)nptr;
+  }
+  CHECK(*endptr >= nptr);
+}
+
+INTERCEPTOR(long, strtol, const char *nptr,  // NOLINT
+            char **endptr, int base) {
+  ENSURE_ASAN_INITED();
+  if (!FLAG_replace_str) {
+    return REAL(strtol)(nptr, endptr, base);
+  }
+  char *real_endptr;
+  long result = REAL(strtol)(nptr, &real_endptr, base);  // NOLINT
+  if (endptr != NULL) {
+    *endptr = real_endptr;
+  }
+  if (IsValidStrtolBase(base)) {
+    FixRealStrtolEndptr(nptr, &real_endptr);
+    ASAN_READ_RANGE(nptr, (real_endptr - nptr) + 1);
+  }
+  return result;
+}
+
+INTERCEPTOR(int, atoi, const char *nptr) {
+  ENSURE_ASAN_INITED();
+  if (!FLAG_replace_str) {
+    return REAL(atoi)(nptr);
+  }
+  char *real_endptr;
+  // "man atoi" tells that behavior of atoi(nptr) is the same as
+  // strtol(nptr, NULL, 10), i.e. it sets errno to ERANGE if the
+  // parsed integer can't be stored in *long* type (even if it's
+  // different from int). So, we just imitate this behavior.
+  int result = REAL(strtol)(nptr, &real_endptr, 10);
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  ASAN_READ_RANGE(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+
+INTERCEPTOR(long, atol, const char *nptr) {  // NOLINT
+  ENSURE_ASAN_INITED();
+  if (!FLAG_replace_str) {
+    return REAL(atol)(nptr);
+  }
+  char *real_endptr;
+  long result = REAL(strtol)(nptr, &real_endptr, 10);  // NOLINT
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  ASAN_READ_RANGE(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+
+#if ASAN_INTERCEPT_ATOLL_AND_STRTOLL
+INTERCEPTOR(long long, strtoll, const char *nptr,  // NOLINT
+            char **endptr, int base) {
+  ENSURE_ASAN_INITED();
+  if (!FLAG_replace_str) {
+    return REAL(strtoll)(nptr, endptr, base);
+  }
+  char *real_endptr;
+  long long result = REAL(strtoll)(nptr, &real_endptr, base);  // NOLINT
+  if (endptr != NULL) {
+    *endptr = real_endptr;
+  }
+  // If base has unsupported value, strtoll can exit with EINVAL
+  // without reading any characters. So do additional checks only
+  // if base is valid.
+  if (IsValidStrtolBase(base)) {
+    FixRealStrtolEndptr(nptr, &real_endptr);
+    ASAN_READ_RANGE(nptr, (real_endptr - nptr) + 1);
+  }
+  return result;
+}
+
+INTERCEPTOR(long long, atoll, const char *nptr) {  // NOLINT
+  ENSURE_ASAN_INITED();
+  if (!FLAG_replace_str) {
+    return REAL(atoll)(nptr);
+  }
+  char *real_endptr;
+  long long result = REAL(strtoll)(nptr, &real_endptr, 10);  // NOLINT
+  FixRealStrtolEndptr(nptr, &real_endptr);
+  ASAN_READ_RANGE(nptr, (real_endptr - nptr) + 1);
+  return result;
+}
+#endif  // ASAN_INTERCEPT_ATOLL_AND_STRTOLL
+
+#if defined(_WIN32)
+INTERCEPTOR_WINAPI(DWORD, CreateThread,
+                   void* security, size_t stack_size,
+                   DWORD (__stdcall *start_routine)(void*), void* arg,
+                   DWORD flags, void* tid) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  int current_tid = asanThreadRegistry().GetCurrentTidOrMinusOne();
+  AsanThread *t = AsanThread::Create(current_tid, start_routine, arg, &stack);
+  asanThreadRegistry().RegisterThread(t);
+  return REAL(CreateThread)(security, stack_size,
+                            asan_thread_start, t, flags, tid);
+}
+
+namespace __asan {
+void InitializeWindowsInterceptors() {
+  CHECK(INTERCEPT_FUNCTION(CreateThread));
+}
+
+}  // namespace __asan
+#endif
+
+// ---------------------- InitializeAsanInterceptors ---------------- {{{1
+namespace __asan {
+void InitializeAsanInterceptors() {
+  static bool was_called_once;
+  CHECK(was_called_once == false);
+  was_called_once = true;
+  // Intercept mem* functions.
+  CHECK(INTERCEPT_FUNCTION(memcmp));
+  CHECK(INTERCEPT_FUNCTION(memmove));
+  CHECK(INTERCEPT_FUNCTION(memset));
+  if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) {
+    CHECK(INTERCEPT_FUNCTION(memcpy));
+  } else {
+    REAL(memcpy) = REAL(memmove);
+  }
+
+  // Intercept str* functions.
+  CHECK(INTERCEPT_FUNCTION(strcat));  // NOLINT
+  CHECK(INTERCEPT_FUNCTION(strchr));
+  CHECK(INTERCEPT_FUNCTION(strcmp));
+  CHECK(INTERCEPT_FUNCTION(strcpy));  // NOLINT
+  CHECK(INTERCEPT_FUNCTION(strlen));
+  CHECK(INTERCEPT_FUNCTION(strncmp));
+  CHECK(INTERCEPT_FUNCTION(strncpy));
+#if !defined(_WIN32)
+  CHECK(INTERCEPT_FUNCTION(strcasecmp));
+  CHECK(INTERCEPT_FUNCTION(strdup));
+  CHECK(INTERCEPT_FUNCTION(strncasecmp));
+# ifndef __APPLE__
+  CHECK(INTERCEPT_FUNCTION(index));
+# else
+  CHECK(OVERRIDE_FUNCTION(index, WRAP(strchr)));
+# endif
+#endif
+#if ASAN_INTERCEPT_STRNLEN
+  CHECK(INTERCEPT_FUNCTION(strnlen));
+#endif
+
+  CHECK(INTERCEPT_FUNCTION(atoi));
+  CHECK(INTERCEPT_FUNCTION(atol));
+  CHECK(INTERCEPT_FUNCTION(strtol));
+#if ASAN_INTERCEPT_ATOLL_AND_STRTOLL
+  CHECK(INTERCEPT_FUNCTION(atoll));
+  CHECK(INTERCEPT_FUNCTION(strtoll));
+#endif
+
+  // Intecept signal- and jump-related functions.
+  CHECK(INTERCEPT_FUNCTION(longjmp));
+#if ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
+  CHECK(INTERCEPT_FUNCTION(sigaction));
+  CHECK(INTERCEPT_FUNCTION(signal));
+#endif
+
+#if !defined(_WIN32)
+  CHECK(INTERCEPT_FUNCTION(_longjmp));
+  INTERCEPT_FUNCTION(__cxa_throw);
+# if !defined(__APPLE__)
+  // On Darwin siglongjmp tailcalls longjmp, so we don't want to intercept it
+  // there.
+  CHECK(INTERCEPT_FUNCTION(siglongjmp));
+# endif
+#endif
+
+  // Intercept threading-related functions
+#if !defined(_WIN32)
+  CHECK(INTERCEPT_FUNCTION(pthread_create));
+#endif
+
+  // Some Windows-specific interceptors.
+#if defined(_WIN32)
+  InitializeWindowsInterceptors();
+#endif
+
+  // Some Mac-specific interceptors.
+#if defined(__APPLE__)
+  InitializeMacInterceptors();
+#endif
+
+  if (FLAG_v > 0) {
+    Report("AddressSanitizer: libc interceptors initialized\n");
+  }
+}
+
+}  // namespace __asan
diff --git a/lib/asan/asan_interceptors.h b/lib/asan/asan_interceptors.h
new file mode 100644
index 0000000..e98514e
--- /dev/null
+++ b/lib/asan/asan_interceptors.h
@@ -0,0 +1,56 @@
+//===-- asan_interceptors.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header for asan_interceptors.cc
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_INTERCEPTORS_H
+#define ASAN_INTERCEPTORS_H
+
+#include "asan_internal.h"
+#include "interception/interception.h"
+
+DECLARE_REAL(int, memcmp, const void *a1, const void *a2, size_t size);
+DECLARE_REAL(void*, memcpy, void *to, const void *from, size_t size);
+DECLARE_REAL(void*, memset, void *block, int c, size_t size);
+DECLARE_REAL(char*, strchr, const char *str, int c);
+DECLARE_REAL(size_t, strlen, const char *s);
+DECLARE_REAL(char*, strncpy, char *to, const char *from, size_t size);
+DECLARE_REAL(size_t, strnlen, const char *s, size_t maxlen);
+struct sigaction;
+DECLARE_REAL(int, sigaction, int signum, const struct sigaction *act,
+                             struct sigaction *oldact);
+
+namespace __asan {
+
+// __asan::internal_X() is the implementation of X() for use in RTL.
+int64_t internal_atoll(const char *nptr);
+size_t internal_strlen(const char *s);
+size_t internal_strnlen(const char *s, size_t maxlen);
+char* internal_strchr(const char *s, int c);
+void* internal_memchr(const void* s, int c, size_t n);
+void* internal_memset(void *s, int c, size_t n);
+int internal_memcmp(const void* s1, const void* s2, size_t n);
+char *internal_strstr(const char *haystack, const char *needle);
+char *internal_strncat(char *dst, const char *src, size_t n);
+int internal_strcmp(const char *s1, const char *s2);
+char *internal_strncpy(char *dst, const char *src, size_t n);
+// Works only for base=10 and doesn't set errno.
+int64_t internal_simple_strtoll(const char *nptr, char **endptr, int base);
+
+void InitializeAsanInterceptors();
+
+#if defined(__APPLE__)
+void InitializeMacInterceptors();
+#endif  // __APPLE__
+
+}  // namespace __asan
+
+#endif  // ASAN_INTERCEPTORS_H
diff --git a/lib/asan/asan_interface.h b/lib/asan/asan_interface.h
new file mode 100644
index 0000000..0f20a4c
--- /dev/null
+++ b/lib/asan/asan_interface.h
@@ -0,0 +1,169 @@
+//===-- asan_interface.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// This header can be included by the instrumented program to fetch
+// data (mostly allocator statistics) from ASan runtime library.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_INTERFACE_H
+#define ASAN_INTERFACE_H
+
+// ----------- ATTENTION -------------
+// This header should NOT include any other headers from ASan runtime.
+// All functions in this header are extern "C" and start with __asan_.
+
+#if !defined(_WIN32)
+#include <stdint.h>  // for uintptr_t
+#define ASAN_INTERFACE_FUNCTION_ATTRIBUTE __attribute__((visibility("default")))
+#else
+// TODO(timurrrr): find out what we need on Windows. __declspec(dllexport) ?
+#define ASAN_INTERFACE_FUNCTION_ATTRIBUTE
+#endif
+#include <stddef.h>  // for size_t
+
+extern "C" {
+  // This function should be called at the very beginning of the process,
+  // before any instrumented code is executed and before any call to malloc.
+  void __asan_init() ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // This function should be called by the instrumented code.
+  // 'addr' is the address of a global variable called 'name' of 'size' bytes.
+  void __asan_register_global(uintptr_t addr, size_t size, const char *name)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // This structure describes an instrumented global variable.
+  struct __asan_global {
+    size_t beg;                // The address of the global.
+    size_t size;               // The original size of the global.
+    size_t size_with_redzone;  // The size with the redzone.
+    const char *name;          // Name as a C string.
+  };
+
+  // These two functions should be called by the instrumented code.
+  // 'globals' is an array of structures describing 'n' globals.
+  void __asan_register_globals(__asan_global *globals, size_t n)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  void __asan_unregister_globals(__asan_global *globals, size_t n)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // These two functions are used by the instrumented code in the
+  // use-after-return mode. __asan_stack_malloc allocates size bytes of
+  // fake stack and __asan_stack_free poisons it. real_stack is a pointer to
+  // the real stack region.
+  size_t __asan_stack_malloc(size_t size, size_t real_stack)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  void __asan_stack_free(size_t ptr, size_t size, size_t real_stack)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // Marks memory region [addr, addr+size) as unaddressable.
+  // This memory must be previously allocated by the user program. Accessing
+  // addresses in this region from instrumented code is forbidden until
+  // this region is unpoisoned. This function is not guaranteed to poison
+  // the whole region - it may poison only subregion of [addr, addr+size) due
+  // to ASan alignment restrictions.
+  // Method is NOT thread-safe in the sense that no two threads can
+  // (un)poison memory in the same memory region simultaneously.
+  void __asan_poison_memory_region(void const volatile *addr, size_t size)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  // Marks memory region [addr, addr+size) as addressable.
+  // This memory must be previously allocated by the user program. Accessing
+  // addresses in this region is allowed until this region is poisoned again.
+  // This function may unpoison a superregion of [addr, addr+size) due to
+  // ASan alignment restrictions.
+  // Method is NOT thread-safe in the sense that no two threads can
+  // (un)poison memory in the same memory region simultaneously.
+  void __asan_unpoison_memory_region(void const volatile *addr, size_t size)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // Performs cleanup before a NoReturn function. Must be called before things
+  // like _exit and execl to avoid false positives on stack.
+  void __asan_handle_no_return() ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+// User code should use macro instead of functions.
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(address_sanitizer)
+#define ASAN_POISON_MEMORY_REGION(addr, size) \
+  __asan_poison_memory_region((addr), (size))
+#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
+  __asan_unpoison_memory_region((addr), (size))
+#else
+#define ASAN_POISON_MEMORY_REGION(addr, size) \
+  ((void)(addr), (void)(size))
+#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
+  ((void)(addr), (void)(size))
+#endif
+
+  // Returns true iff addr is poisoned (i.e. 1-byte read/write access to this
+  // address will result in error report from AddressSanitizer).
+  bool __asan_address_is_poisoned(void const volatile *addr)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // This is an internal function that is called to report an error.
+  // However it is still a part of the interface because users may want to
+  // set a breakpoint on this function in a debugger.
+  void __asan_report_error(uintptr_t pc, uintptr_t bp, uintptr_t sp,
+                           uintptr_t addr, bool is_write, size_t access_size)
+    ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // Sets the exit code to use when reporting an error.
+  // Returns the old value.
+  int __asan_set_error_exit_code(int exit_code)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // Sets the callback to be called right before death on error.
+  // Passing NULL will unset the callback.
+  void __asan_set_death_callback(void (*callback)(void))
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  void __asan_set_error_report_callback(void (*callback)(const char*))
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+
+  // Returns the estimated number of bytes that will be reserved by allocator
+  // for request of "size" bytes. If ASan allocator can't allocate that much
+  // memory, returns the maximal possible allocation size, otherwise returns
+  // "size".
+  size_t __asan_get_estimated_allocated_size(size_t size)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  // Returns true if p was returned by the ASan allocator and
+  // is not yet freed.
+  bool __asan_get_ownership(const void *p)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  // Returns the number of bytes reserved for the pointer p.
+  // Requires (get_ownership(p) == true) or (p == NULL).
+  size_t __asan_get_allocated_size(const void *p)
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  // Number of bytes, allocated and not yet freed by the application.
+  size_t __asan_get_current_allocated_bytes()
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  // Number of bytes, mmaped by asan allocator to fulfill allocation requests.
+  // Generally, for request of X bytes, allocator can reserve and add to free
+  // lists a large number of chunks of size X to use them for future requests.
+  // All these chunks count toward the heap size. Currently, allocator never
+  // releases memory to OS (instead, it just puts freed chunks to free lists).
+  size_t __asan_get_heap_size()
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  // Number of bytes, mmaped by asan allocator, which can be used to fulfill
+  // allocation requests. When a user program frees memory chunk, it can first
+  // fall into quarantine and will count toward __asan_get_free_bytes() later.
+  size_t __asan_get_free_bytes()
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  // Number of bytes in unmapped pages, that are released to OS. Currently,
+  // always returns 0.
+  size_t __asan_get_unmapped_bytes()
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+  // Prints accumulated stats to stderr. Used for debugging.
+  void __asan_print_accumulated_stats()
+      ASAN_INTERFACE_FUNCTION_ATTRIBUTE;
+}  // namespace
+
+#undef ASAN_INTERFACE_FUNCTION_ATTRIBUTE
+#endif  // ASAN_INTERFACE_H
diff --git a/lib/asan/asan_internal.h b/lib/asan/asan_internal.h
new file mode 100644
index 0000000..df1814c
--- /dev/null
+++ b/lib/asan/asan_internal.h
@@ -0,0 +1,357 @@
+//===-- asan_internal.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header which defines various general utilities.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_INTERNAL_H
+#define ASAN_INTERNAL_H
+
+#if !defined(__linux__) && !defined(__APPLE__) && !defined(_WIN32)
+# error "This operating system is not supported by AddressSanitizer"
+#endif
+
+#include <stddef.h>  // for size_t, uintptr_t, etc.
+
+#if defined(_WIN32)
+// There's no <stdint.h> in Visual Studio 9, so we have to define [u]int*_t.
+typedef unsigned __int8  uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int8           int8_t;
+typedef __int16          int16_t;
+typedef __int32          int32_t;
+typedef __int64          int64_t;
+typedef unsigned long    DWORD;  // NOLINT
+
+extern "C" void* _ReturnAddress(void);
+# pragma intrinsic(_ReturnAddress)
+
+# define ALIAS(x)   // TODO(timurrrr): do we need this on Windows?
+# define ALIGNED(x) __declspec(align(x))
+# define NOINLINE __declspec(noinline)
+# define NORETURN __declspec(noreturn)
+
+# define ASAN_INTERFACE_ATTRIBUTE  // TODO(timurrrr): do we need this on Win?
+#else  // defined(_WIN32)
+# include <stdint.h>  // for __WORDSIZE
+
+# define ALIAS(x) __attribute__((alias(x)))
+# define ALIGNED(x) __attribute__((aligned(x)))
+# define NOINLINE __attribute__((noinline))
+# define NORETURN  __attribute__((noreturn))
+
+# define ASAN_INTERFACE_ATTRIBUTE __attribute__((visibility("default")))
+#endif  // defined(_WIN32)
+
+// If __WORDSIZE was undefined by the platform, define it in terms of the
+// compiler built-ins __LP64__ and _WIN64.
+#ifndef __WORDSIZE
+#if __LP64__ || defined(_WIN64)
+#define __WORDSIZE 64
+#else
+#define __WORDSIZE 32
+#endif
+#endif
+
+// Limits for integral types. We have to redefine it in case we don't
+// have stdint.h (like in Visual Studio 9).
+#if __WORDSIZE == 64
+# define __INT64_C(c)  c ## L
+# define __UINT64_C(c) c ## UL
+#else
+# define __INT64_C(c)  c ## LL
+# define __UINT64_C(c) c ## ULL
+#endif  // __WORDSIZE == 64
+#undef INT32_MIN
+#define INT32_MIN              (-2147483647-1)
+#undef INT32_MAX
+#define INT32_MAX              (2147483647)
+#undef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#undef INT64_MIN
+#define INT64_MIN              (-__INT64_C(9223372036854775807)-1)
+#undef INT64_MAX
+#define INT64_MAX              (__INT64_C(9223372036854775807))
+#undef UINT64_MAX
+#define UINT64_MAX             (__UINT64_C(18446744073709551615))
+
+#define ASAN_DEFAULT_FAILURE_EXITCODE 1
+
+#if defined(__linux__)
+# define ASAN_LINUX   1
+#else
+# define ASAN_LINUX   0
+#endif
+
+#if defined(__APPLE__)
+# define ASAN_MAC     1
+#else
+# define ASAN_MAC     0
+#endif
+
+#if defined(_WIN32)
+# define ASAN_WINDOWS 1
+#else
+# define ASAN_WINDOWS 0
+#endif
+
+#define ASAN_POSIX (ASAN_LINUX || ASAN_MAC)
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+
+#if __has_feature(address_sanitizer)
+# error "The AddressSanitizer run-time should not be"
+        " instrumented by AddressSanitizer"
+#endif
+
+// Build-time configuration options.
+
+// If set, asan will install its own SEGV signal handler.
+#ifndef ASAN_NEEDS_SEGV
+# define ASAN_NEEDS_SEGV 1
+#endif
+
+// If set, asan will intercept C++ exception api call(s).
+#ifndef ASAN_HAS_EXCEPTIONS
+# define ASAN_HAS_EXCEPTIONS 1
+#endif
+
+// If set, asan uses the values of SHADOW_SCALE and SHADOW_OFFSET
+// provided by the instrumented objects. Otherwise constants are used.
+#ifndef ASAN_FLEXIBLE_MAPPING_AND_OFFSET
+# define ASAN_FLEXIBLE_MAPPING_AND_OFFSET 0
+#endif
+
+// If set, values like allocator chunk size, as well as defaults for some flags
+// will be changed towards less memory overhead.
+#ifndef ASAN_LOW_MEMORY
+# define ASAN_LOW_MEMORY 0
+#endif
+
+// All internal functions in asan reside inside the __asan namespace
+// to avoid namespace collisions with the user programs.
+// Seperate namespace also makes it simpler to distinguish the asan run-time
+// functions from the instrumented user code in a profile.
+namespace __asan {
+
+class AsanThread;
+struct AsanStackTrace;
+
+// asan_rtl.cc
+void NORETURN CheckFailed(const char *cond, const char *file, int line);
+void NORETURN ShowStatsAndAbort();
+
+// asan_globals.cc
+bool DescribeAddrIfGlobal(uintptr_t addr);
+
+void ReplaceOperatorsNewAndDelete();
+// asan_malloc_linux.cc / asan_malloc_mac.cc
+void ReplaceSystemMalloc();
+
+void OutOfMemoryMessageAndDie(const char *mem_type, size_t size);
+
+// asan_linux.cc / asan_mac.cc / asan_win.cc
+void *AsanDoesNotSupportStaticLinkage();
+bool AsanShadowRangeIsAvailable();
+int AsanOpenReadonly(const char* filename);
+const char *AsanGetEnv(const char *name);
+void AsanDumpProcessMap();
+
+void *AsanMmapFixedNoReserve(uintptr_t fixed_addr, size_t size);
+void *AsanMmapFixedReserve(uintptr_t fixed_addr, size_t size);
+void *AsanMprotect(uintptr_t fixed_addr, size_t size);
+void *AsanMmapSomewhereOrDie(size_t size, const char *where);
+void AsanUnmapOrDie(void *ptr, size_t size);
+
+void AsanDisableCoreDumper();
+void GetPcSpBp(void *context, uintptr_t *pc, uintptr_t *sp, uintptr_t *bp);
+
+size_t AsanRead(int fd, void *buf, size_t count);
+size_t AsanWrite(int fd, const void *buf, size_t count);
+int AsanClose(int fd);
+
+bool AsanInterceptsSignal(int signum);
+void SetAlternateSignalStack();
+void UnsetAlternateSignalStack();
+void InstallSignalHandlers();
+int GetPid();
+uintptr_t GetThreadSelf();
+int AtomicInc(int *a);
+uint16_t AtomicExchange(uint16_t *a, uint16_t new_val);
+
+// Wrapper for TLS/TSD.
+void AsanTSDInit(void (*destructor)(void *tsd));
+void *AsanTSDGet();
+void AsanTSDSet(void *tsd);
+
+// Opens the file 'file_name" and reads up to 'max_len' bytes.
+// The resulting buffer is mmaped and stored in '*buff'.
+// The size of the mmaped region is stored in '*buff_size',
+// Returns the number of read bytes or 0 if file can not be opened.
+size_t ReadFileToBuffer(const char *file_name, char **buff,
+                        size_t *buff_size, size_t max_len);
+
+// asan_printf.cc
+void RawWrite(const char *buffer);
+int SNPrintf(char *buffer, size_t length, const char *format, ...);
+void Printf(const char *format, ...);
+int SScanf(const char *str, const char *format, ...);
+void Report(const char *format, ...);
+
+// Don't use std::min and std::max, to minimize dependency on libstdc++.
+template<class T> T Min(T a, T b) { return a < b ? a : b; }
+template<class T> T Max(T a, T b) { return a > b ? a : b; }
+
+void SortArray(uintptr_t *array, size_t size);
+
+// asan_poisoning.cc
+// Poisons the shadow memory for "size" bytes starting from "addr".
+void PoisonShadow(uintptr_t addr, size_t size, uint8_t value);
+// Poisons the shadow memory for "redzone_size" bytes starting from
+// "addr + size".
+void PoisonShadowPartialRightRedzone(uintptr_t addr,
+                                     uintptr_t size,
+                                     uintptr_t redzone_size,
+                                     uint8_t value);
+
+// Platfrom-specific options.
+#ifdef __APPLE__
+bool PlatformHasDifferentMemcpyAndMemmove();
+# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE \
+    (PlatformHasDifferentMemcpyAndMemmove())
+#else
+# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true
+#endif  // __APPLE__
+
+extern size_t FLAG_quarantine_size;
+extern int    FLAG_demangle;
+extern bool   FLAG_symbolize;
+extern int    FLAG_v;
+extern size_t FLAG_redzone;
+extern int    FLAG_debug;
+extern bool   FLAG_poison_shadow;
+extern int    FLAG_report_globals;
+extern size_t FLAG_malloc_context_size;
+extern bool   FLAG_replace_str;
+extern bool   FLAG_replace_intrin;
+extern bool   FLAG_replace_cfallocator;
+extern bool   FLAG_fast_unwind;
+extern bool   FLAG_use_fake_stack;
+extern size_t FLAG_max_malloc_fill_size;
+extern int    FLAG_exitcode;
+extern bool   FLAG_allow_user_poisoning;
+extern int    FLAG_sleep_before_dying;
+extern bool   FLAG_handle_segv;
+extern bool   FLAG_use_sigaltstack;
+
+extern int asan_inited;
+// Used to avoid infinite recursion in __asan_init().
+extern bool asan_init_is_running;
+
+enum LinkerInitialized { LINKER_INITIALIZED = 0 };
+
+void NORETURN AsanDie();
+void SleepForSeconds(int seconds);
+void NORETURN Exit(int exitcode);
+void NORETURN Abort();
+int Atexit(void (*function)(void));
+
+#define CHECK(cond) do { if (!(cond)) { \
+  CheckFailed(#cond, __FILE__, __LINE__); \
+}}while(0)
+
+#define RAW_CHECK_MSG(expr, msg) do { \
+  if (!(expr)) { \
+    RawWrite(msg); \
+    AsanDie(); \
+  } \
+} while (0)
+
+#define RAW_CHECK(expr) RAW_CHECK_MSG(expr, #expr)
+
+#define UNIMPLEMENTED() CHECK("unimplemented" && 0)
+
+#define ASAN_ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
+
+const size_t kWordSize = __WORDSIZE / 8;
+const size_t kWordSizeInBits = 8 * kWordSize;
+const size_t kPageSizeBits = 12;
+const size_t kPageSize = 1UL << kPageSizeBits;
+
+#ifndef _WIN32
+const size_t kMmapGranularity = kPageSize;
+# define GET_CALLER_PC() (uintptr_t)__builtin_return_address(0)
+# define GET_CURRENT_FRAME() (uintptr_t)__builtin_frame_address(0)
+# define THREAD_CALLING_CONV
+typedef void* thread_return_t;
+#else
+const size_t kMmapGranularity = 1UL << 16;
+# define GET_CALLER_PC() (uintptr_t)_ReturnAddress()
+// CaptureStackBackTrace doesn't need to know BP on Windows.
+// FIXME: This macro is still used when printing error reports though it's not
+// clear if the BP value is needed in the ASan reports on Windows.
+# define GET_CURRENT_FRAME() (uintptr_t)0xDEADBEEF
+# define THREAD_CALLING_CONV __stdcall
+typedef DWORD thread_return_t;
+
+# ifndef ASAN_USE_EXTERNAL_SYMBOLIZER
+#  define ASAN_USE_EXTERNAL_SYMBOLIZER __asan::WinSymbolize
+bool WinSymbolize(const void *addr, char *out_buffer, int buffer_size);
+# endif
+#endif
+
+typedef thread_return_t (THREAD_CALLING_CONV *thread_callback_t)(void* arg);
+
+// These magic values are written to shadow for better error reporting.
+const int kAsanHeapLeftRedzoneMagic = 0xfa;
+const int kAsanHeapRightRedzoneMagic = 0xfb;
+const int kAsanHeapFreeMagic = 0xfd;
+const int kAsanStackLeftRedzoneMagic = 0xf1;
+const int kAsanStackMidRedzoneMagic = 0xf2;
+const int kAsanStackRightRedzoneMagic = 0xf3;
+const int kAsanStackPartialRedzoneMagic = 0xf4;
+const int kAsanStackAfterReturnMagic = 0xf5;
+const int kAsanUserPoisonedMemoryMagic = 0xf7;
+const int kAsanGlobalRedzoneMagic = 0xf9;
+const int kAsanInternalHeapMagic = 0xfe;
+
+static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
+static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
+
+// --------------------------- Bit twiddling ------- {{{1
+inline bool IsPowerOfTwo(size_t x) {
+  return (x & (x - 1)) == 0;
+}
+
+inline size_t RoundUpTo(size_t size, size_t boundary) {
+  CHECK(IsPowerOfTwo(boundary));
+  return (size + boundary - 1) & ~(boundary - 1);
+}
+
+// -------------------------- LowLevelAllocator ----- {{{1
+// A simple low-level memory allocator for internal use.
+class LowLevelAllocator {
+ public:
+  explicit LowLevelAllocator(LinkerInitialized) {}
+  // 'size' must be a power of two.
+  // Requires an external lock.
+  void *Allocate(size_t size);
+ private:
+  char *allocated_end_;
+  char *allocated_current_;
+};
+
+}  // namespace __asan
+
+#endif  // ASAN_INTERNAL_H
diff --git a/lib/asan/asan_linux.cc b/lib/asan/asan_linux.cc
new file mode 100644
index 0000000..fb2e21d
--- /dev/null
+++ b/lib/asan/asan_linux.cc
@@ -0,0 +1,388 @@
+//===-- asan_linux.cc -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Linux-specific details.
+//===----------------------------------------------------------------------===//
+#ifdef __linux__
+
+#include "asan_interceptors.h"
+#include "asan_internal.h"
+#include "asan_lock.h"
+#include "asan_procmaps.h"
+#include "asan_thread.h"
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <unwind.h>
+
+#ifndef ANDROID
+// FIXME: where to get ucontext on Android?
+#include <sys/ucontext.h>
+#endif
+
+extern "C" void* _DYNAMIC;
+
+namespace __asan {
+
+void *AsanDoesNotSupportStaticLinkage() {
+  // This will fail to link with -static.
+  return &_DYNAMIC;  // defined in link.h
+}
+
+bool AsanShadowRangeIsAvailable() {
+  // FIXME: shall we need anything here on Linux?
+  return true;
+}
+
+void GetPcSpBp(void *context, uintptr_t *pc, uintptr_t *sp, uintptr_t *bp) {
+#ifdef ANDROID
+  *pc = *sp = *bp = 0;
+#elif defined(__arm__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+  *pc = ucontext->uc_mcontext.arm_pc;
+  *bp = ucontext->uc_mcontext.arm_fp;
+  *sp = ucontext->uc_mcontext.arm_sp;
+# elif defined(__x86_64__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+  *pc = ucontext->uc_mcontext.gregs[REG_RIP];
+  *bp = ucontext->uc_mcontext.gregs[REG_RBP];
+  *sp = ucontext->uc_mcontext.gregs[REG_RSP];
+# elif defined(__i386__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+  *pc = ucontext->uc_mcontext.gregs[REG_EIP];
+  *bp = ucontext->uc_mcontext.gregs[REG_EBP];
+  *sp = ucontext->uc_mcontext.gregs[REG_ESP];
+#else
+# error "Unsupported arch"
+#endif
+}
+
+bool AsanInterceptsSignal(int signum) {
+  return signum == SIGSEGV && FLAG_handle_segv;
+}
+
+static void *asan_mmap(void *addr, size_t length, int prot, int flags,
+                int fd, uint64_t offset) {
+# if __WORDSIZE == 64
+  return (void *)syscall(__NR_mmap, addr, length, prot, flags, fd, offset);
+# else
+  return (void *)syscall(__NR_mmap2, addr, length, prot, flags, fd, offset);
+# endif
+}
+
+void *AsanMmapSomewhereOrDie(size_t size, const char *mem_type) {
+  size = RoundUpTo(size, kPageSize);
+  void *res = asan_mmap(0, size,
+                        PROT_READ | PROT_WRITE,
+                        MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (res == (void*)-1) {
+    OutOfMemoryMessageAndDie(mem_type, size);
+  }
+  return res;
+}
+
+void *AsanMmapFixedNoReserve(uintptr_t fixed_addr, size_t size) {
+  return asan_mmap((void*)fixed_addr, size,
+                   PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
+                   0, 0);
+}
+
+void *AsanMprotect(uintptr_t fixed_addr, size_t size) {
+  return asan_mmap((void*)fixed_addr, size,
+                   PROT_NONE,
+                   MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
+                   0, 0);
+}
+
+void AsanUnmapOrDie(void *addr, size_t size) {
+  if (!addr || !size) return;
+  int res = syscall(__NR_munmap, addr, size);
+  if (res != 0) {
+    Report("Failed to unmap\n");
+    AsanDie();
+  }
+}
+
+size_t AsanWrite(int fd, const void *buf, size_t count) {
+  return (size_t)syscall(__NR_write, fd, buf, count);
+}
+
+int AsanOpenReadonly(const char* filename) {
+  return syscall(__NR_open, filename, O_RDONLY);
+}
+
+// Like getenv, but reads env directly from /proc and does not use libc.
+// This function should be called first inside __asan_init.
+const char* AsanGetEnv(const char* name) {
+  static char *environ;
+  static size_t len;
+  static bool inited;
+  if (!inited) {
+    inited = true;
+    size_t environ_size;
+    len = ReadFileToBuffer("/proc/self/environ",
+                           &environ, &environ_size, 1 << 26);
+  }
+  if (!environ || len == 0) return NULL;
+  size_t namelen = internal_strlen(name);
+  const char *p = environ;
+  while (*p != '\0') {  // will happen at the \0\0 that terminates the buffer
+    // proc file has the format NAME=value\0NAME=value\0NAME=value\0...
+    const char* endp =
+        (char*)internal_memchr(p, '\0', len - (p - environ));
+    if (endp == NULL)  // this entry isn't NUL terminated
+      return NULL;
+    else if (!internal_memcmp(p, name, namelen) && p[namelen] == '=')  // Match.
+      return p + namelen + 1;  // point after =
+    p = endp + 1;
+  }
+  return NULL;  // Not found.
+}
+
+size_t AsanRead(int fd, void *buf, size_t count) {
+  return (size_t)syscall(__NR_read, fd, buf, count);
+}
+
+int AsanClose(int fd) {
+  return syscall(__NR_close, fd);
+}
+
+AsanProcMaps::AsanProcMaps() {
+  proc_self_maps_buff_len_ =
+      ReadFileToBuffer("/proc/self/maps", &proc_self_maps_buff_,
+                       &proc_self_maps_buff_mmaped_size_, 1 << 26);
+  CHECK(proc_self_maps_buff_len_ > 0);
+  // AsanWrite(2, proc_self_maps_buff_, proc_self_maps_buff_len_);
+  Reset();
+}
+
+AsanProcMaps::~AsanProcMaps() {
+  AsanUnmapOrDie(proc_self_maps_buff_, proc_self_maps_buff_mmaped_size_);
+}
+
+void AsanProcMaps::Reset() {
+  current_ = proc_self_maps_buff_;
+}
+
+bool AsanProcMaps::Next(uintptr_t *start, uintptr_t *end,
+                        uintptr_t *offset, char filename[],
+                        size_t filename_size) {
+  char *last = proc_self_maps_buff_ + proc_self_maps_buff_len_;
+  if (current_ >= last) return false;
+  int consumed = 0;
+  char flags[10];
+  int major, minor;
+  uintptr_t inode;
+  char *next_line = (char*)internal_memchr(current_, '\n', last - current_);
+  if (next_line == NULL)
+    next_line = last;
+  if (SScanf(current_,
+             "%lx-%lx %4s %lx %x:%x %ld %n",
+             start, end, flags, offset, &major, &minor,
+             &inode, &consumed) != 7)
+    return false;
+  current_ += consumed;
+  // Skip spaces.
+  while (current_ < next_line && *current_ == ' ')
+    current_++;
+  // Fill in the filename.
+  size_t i = 0;
+  while (current_ < next_line) {
+    if (filename && i < filename_size - 1)
+      filename[i++] = *current_;
+    current_++;
+  }
+  if (filename && i < filename_size)
+    filename[i] = 0;
+  current_ = next_line + 1;
+  return true;
+}
+
+#if 1
+
+// Gets the object name and the offset by walking AsanProcMaps.
+bool AsanProcMaps::GetObjectNameAndOffset(uintptr_t addr, uintptr_t *offset,
+                                          char filename[],
+                                          size_t filename_size) {
+  return IterateForObjectNameAndOffset(addr, offset, filename, filename_size);
+}
+
+#else
+// dl_iterate_phdr machinery is not working well for us.
+// We either need to fix it or get rid of it.
+struct DlIterateData {
+  int count;
+  uintptr_t addr;
+  uintptr_t offset;
+  char *filename;
+  size_t filename_size;
+};
+
+static int dl_iterate_phdr_callback(struct dl_phdr_info *info,
+                                    size_t size, void *raw_data) {
+  DlIterateData *data = (DlIterateData*)raw_data;
+  int count = data->count++;
+  if (info->dlpi_addr > data->addr)
+    return 0;
+  if (count == 0) {
+    // The first item (the main executable) does not have a so name,
+    // but we can just read it from /proc/self/exe.
+    size_t path_len = readlink("/proc/self/exe",
+                               data->filename, data->filename_size - 1);
+    data->filename[path_len] = 0;
+  } else {
+    CHECK(info->dlpi_name);
+    REAL(strncpy)(data->filename, info->dlpi_name, data->filename_size);
+  }
+  data->offset = data->addr - info->dlpi_addr;
+  return 1;
+}
+
+// Gets the object name and the offset using dl_iterate_phdr.
+bool AsanProcMaps::GetObjectNameAndOffset(uintptr_t addr, uintptr_t *offset,
+                                          char filename[],
+                                          size_t filename_size) {
+  DlIterateData data;
+  data.count = 0;
+  data.addr = addr;
+  data.filename = filename;
+  data.filename_size = filename_size;
+  if (dl_iterate_phdr(dl_iterate_phdr_callback, &data)) {
+    *offset = data.offset;
+    return true;
+  }
+  return false;
+}
+
+#endif  // __arm__
+
+void AsanThread::SetThreadStackTopAndBottom() {
+  if (tid() == 0) {
+    // This is the main thread. Libpthread may not be initialized yet.
+    struct rlimit rl;
+    CHECK(getrlimit(RLIMIT_STACK, &rl) == 0);
+
+    // Find the mapping that contains a stack variable.
+    AsanProcMaps proc_maps;
+    uintptr_t start, end, offset;
+    uintptr_t prev_end = 0;
+    while (proc_maps.Next(&start, &end, &offset, NULL, 0)) {
+      if ((uintptr_t)&rl < end)
+        break;
+      prev_end = end;
+    }
+    CHECK((uintptr_t)&rl >= start && (uintptr_t)&rl < end);
+
+    // Get stacksize from rlimit, but clip it so that it does not overlap
+    // with other mappings.
+    size_t stacksize = rl.rlim_cur;
+    if (stacksize > end - prev_end)
+      stacksize = end - prev_end;
+    if (stacksize > kMaxThreadStackSize)
+      stacksize = kMaxThreadStackSize;
+    stack_top_ = end;
+    stack_bottom_ = end - stacksize;
+    CHECK(AddrIsInStack((uintptr_t)&rl));
+    return;
+  }
+  pthread_attr_t attr;
+  CHECK(pthread_getattr_np(pthread_self(), &attr) == 0);
+  size_t stacksize = 0;
+  void *stackaddr = NULL;
+  pthread_attr_getstack(&attr, &stackaddr, &stacksize);
+  pthread_attr_destroy(&attr);
+
+  stack_top_ = (uintptr_t)stackaddr + stacksize;
+  stack_bottom_ = (uintptr_t)stackaddr;
+  // When running with unlimited stack size, we still want to set some limit.
+  // The unlimited stack size is caused by 'ulimit -s unlimited'.
+  // Also, for some reason, GNU make spawns subrocesses with unlimited stack.
+  if (stacksize > kMaxThreadStackSize) {
+    stack_bottom_ = stack_top_ - kMaxThreadStackSize;
+  }
+  CHECK(AddrIsInStack((uintptr_t)&attr));
+}
+
+AsanLock::AsanLock(LinkerInitialized) {
+  // We assume that pthread_mutex_t initialized to all zeroes is a valid
+  // unlocked mutex. We can not use PTHREAD_MUTEX_INITIALIZER as it triggers
+  // a gcc warning:
+  // extended initializer lists only available with -std=c++0x or -std=gnu++0x
+}
+
+void AsanLock::Lock() {
+  CHECK(sizeof(pthread_mutex_t) <= sizeof(opaque_storage_));
+  pthread_mutex_lock((pthread_mutex_t*)&opaque_storage_);
+  CHECK(!owner_);
+  owner_ = (uintptr_t)pthread_self();
+}
+
+void AsanLock::Unlock() {
+  CHECK(owner_ == (uintptr_t)pthread_self());
+  owner_ = 0;
+  pthread_mutex_unlock((pthread_mutex_t*)&opaque_storage_);
+}
+
+#ifdef __arm__
+#define UNWIND_STOP _URC_END_OF_STACK
+#define UNWIND_CONTINUE _URC_NO_REASON
+#else
+#define UNWIND_STOP _URC_NORMAL_STOP
+#define UNWIND_CONTINUE _URC_NO_REASON
+#endif
+
+uintptr_t Unwind_GetIP(struct _Unwind_Context *ctx) {
+#ifdef __arm__
+  uintptr_t val;
+  _Unwind_VRS_Result res = _Unwind_VRS_Get(ctx, _UVRSC_CORE,
+      15 /* r15 = PC */, _UVRSD_UINT32, &val);
+  CHECK(res == _UVRSR_OK && "_Unwind_VRS_Get failed");
+  // Clear the Thumb bit.
+  return val & ~(uintptr_t)1;
+#else
+  return _Unwind_GetIP(ctx);
+#endif
+}
+
+_Unwind_Reason_Code Unwind_Trace(struct _Unwind_Context *ctx,
+    void *param) {
+  AsanStackTrace *b = (AsanStackTrace*)param;
+  CHECK(b->size < b->max_size);
+  uintptr_t pc = Unwind_GetIP(ctx);
+  b->trace[b->size++] = pc;
+  if (b->size == b->max_size) return UNWIND_STOP;
+  return UNWIND_CONTINUE;
+}
+
+void AsanStackTrace::GetStackTrace(size_t max_s, uintptr_t pc, uintptr_t bp) {
+  size = 0;
+  trace[0] = pc;
+  if ((max_s) > 1) {
+    max_size = max_s;
+#ifdef __arm__
+    _Unwind_Backtrace(Unwind_Trace, this);
+#else
+     FastUnwindStack(pc, bp);
+#endif
+  }
+}
+
+}  // namespace __asan
+
+#endif  // __linux__
diff --git a/lib/asan/asan_lock.h b/lib/asan/asan_lock.h
new file mode 100644
index 0000000..75da8ae
--- /dev/null
+++ b/lib/asan/asan_lock.h
@@ -0,0 +1,51 @@
+//===-- asan_lock.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// A wrapper for a simple lock.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_LOCK_H
+#define ASAN_LOCK_H
+
+#include "asan_internal.h"
+
+// The locks in ASan are global objects and they are never destroyed to avoid
+// at-exit races (that is, a lock is being used by other threads while the main
+// thread is doing atexit destructors).
+// We define the class using opaque storage to avoid including system headers.
+
+namespace __asan {
+
+class AsanLock {
+ public:
+  explicit AsanLock(LinkerInitialized);
+  void Lock();
+  void Unlock();
+  bool IsLocked() { return owner_ != 0; }
+ private:
+  uintptr_t opaque_storage_[10];
+  uintptr_t owner_;  // for debugging and for malloc_introspection_t interface
+};
+
+class ScopedLock {
+ public:
+  explicit ScopedLock(AsanLock *mu) : mu_(mu) {
+    mu_->Lock();
+  }
+  ~ScopedLock() {
+    mu_->Unlock();
+  }
+ private:
+  AsanLock *mu_;
+};
+
+}  // namespace __asan
+
+#endif  // ASAN_LOCK_H
diff --git a/lib/asan/asan_mac.cc b/lib/asan/asan_mac.cc
new file mode 100644
index 0000000..b68b0ed
--- /dev/null
+++ b/lib/asan/asan_mac.cc
@@ -0,0 +1,685 @@
+//===-- asan_mac.cc -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Mac-specific details.
+//===----------------------------------------------------------------------===//
+
+#ifdef __APPLE__
+
+#include "asan_interceptors.h"
+#include "asan_internal.h"
+#include "asan_mapping.h"
+#include "asan_procmaps.h"
+#include "asan_stack.h"
+#include "asan_thread.h"
+#include "asan_thread_registry.h"
+
+#include <crt_externs.h>  // for _NSGetEnviron
+#include <mach-o/dyld.h>
+#include <mach-o/loader.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/sysctl.h>
+#include <sys/ucontext.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libkern/OSAtomic.h>
+#include <CoreFoundation/CFString.h>
+
+namespace __asan {
+
+void GetPcSpBp(void *context, uintptr_t *pc, uintptr_t *sp, uintptr_t *bp) {
+  ucontext_t *ucontext = (ucontext_t*)context;
+# if __WORDSIZE == 64
+  *pc = ucontext->uc_mcontext->__ss.__rip;
+  *bp = ucontext->uc_mcontext->__ss.__rbp;
+  *sp = ucontext->uc_mcontext->__ss.__rsp;
+# else
+  *pc = ucontext->uc_mcontext->__ss.__eip;
+  *bp = ucontext->uc_mcontext->__ss.__ebp;
+  *sp = ucontext->uc_mcontext->__ss.__esp;
+# endif  // __WORDSIZE
+}
+
+enum {
+  MACOS_VERSION_UNKNOWN = 0,
+  MACOS_VERSION_LEOPARD,
+  MACOS_VERSION_SNOW_LEOPARD,
+  MACOS_VERSION_LION,
+};
+
+static int GetMacosVersion() {
+  int mib[2] = { CTL_KERN, KERN_OSRELEASE };
+  char version[100];
+  size_t len = 0, maxlen = sizeof(version) / sizeof(version[0]);
+  for (int i = 0; i < maxlen; i++) version[i] = '\0';
+  // Get the version length.
+  CHECK(sysctl(mib, 2, NULL, &len, NULL, 0) != -1);
+  CHECK(len < maxlen);
+  CHECK(sysctl(mib, 2, version, &len, NULL, 0) != -1);
+  switch (version[0]) {
+    case '9': return MACOS_VERSION_LEOPARD;
+    case '1': {
+      switch (version[1]) {
+        case '0': return MACOS_VERSION_SNOW_LEOPARD;
+        case '1': return MACOS_VERSION_LION;
+        default: return MACOS_VERSION_UNKNOWN;
+      }
+    }
+    default: return MACOS_VERSION_UNKNOWN;
+  }
+}
+
+bool PlatformHasDifferentMemcpyAndMemmove() {
+  // On OS X 10.7 memcpy() and memmove() are both resolved
+  // into memmove$VARIANT$sse42.
+  // See also http://code.google.com/p/address-sanitizer/issues/detail?id=34.
+  // TODO(glider): need to check dynamically that memcpy() and memmove() are
+  // actually the same function.
+  return GetMacosVersion() == MACOS_VERSION_SNOW_LEOPARD;
+}
+
+// No-op. Mac does not support static linkage anyway.
+void *AsanDoesNotSupportStaticLinkage() {
+  return NULL;
+}
+
+static inline bool IntervalsAreSeparate(uintptr_t start1, uintptr_t end1,
+                                        uintptr_t start2, uintptr_t end2) {
+  CHECK(start1 <= end1);
+  CHECK(start2 <= end2);
+  return (end1 < start2) || (end2 < start1);
+}
+
+// FIXME: this is thread-unsafe, but should not cause problems most of the time.
+// When the shadow is mapped only a single thread usually exists (plus maybe
+// several worker threads on Mac, which aren't expected to map big chunks of
+// memory).
+bool AsanShadowRangeIsAvailable() {
+  AsanProcMaps procmaps;
+  uintptr_t start, end;
+  bool available = true;
+  while (procmaps.Next(&start, &end,
+                       /*offset*/NULL, /*filename*/NULL, /*filename_size*/0)) {
+    if (!IntervalsAreSeparate(start, end,
+                              kLowShadowBeg - kMmapGranularity,
+                              kHighShadowEnd)) {
+      available = false;
+      break;
+    }
+  }
+  return available;
+}
+
+bool AsanInterceptsSignal(int signum) {
+  return (signum == SIGSEGV || signum == SIGBUS) && FLAG_handle_segv;
+}
+
+static void *asan_mmap(void *addr, size_t length, int prot, int flags,
+                int fd, uint64_t offset) {
+  return mmap(addr, length, prot, flags, fd, offset);
+}
+
+size_t AsanWrite(int fd, const void *buf, size_t count) {
+  return write(fd, buf, count);
+}
+
+void *AsanMmapSomewhereOrDie(size_t size, const char *mem_type) {
+  size = RoundUpTo(size, kPageSize);
+  void *res = asan_mmap(0, size,
+                        PROT_READ | PROT_WRITE,
+                        MAP_PRIVATE | MAP_ANON, -1, 0);
+  if (res == (void*)-1) {
+    OutOfMemoryMessageAndDie(mem_type, size);
+  }
+  return res;
+}
+
+void *AsanMmapFixedNoReserve(uintptr_t fixed_addr, size_t size) {
+  return asan_mmap((void*)fixed_addr, size,
+                   PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
+                   0, 0);
+}
+
+void *AsanMprotect(uintptr_t fixed_addr, size_t size) {
+  return asan_mmap((void*)fixed_addr, size,
+                   PROT_NONE,
+                   MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
+                   0, 0);
+}
+
+void AsanUnmapOrDie(void *addr, size_t size) {
+  if (!addr || !size) return;
+  int res = munmap(addr, size);
+  if (res != 0) {
+    Report("Failed to unmap\n");
+    AsanDie();
+  }
+}
+
+int AsanOpenReadonly(const char* filename) {
+  return open(filename, O_RDONLY);
+}
+
+const char *AsanGetEnv(const char *name) {
+  char ***env_ptr = _NSGetEnviron();
+  CHECK(env_ptr);
+  char **environ = *env_ptr;
+  CHECK(environ);
+  size_t name_len = internal_strlen(name);
+  while (*environ != NULL) {
+    size_t len = internal_strlen(*environ);
+    if (len > name_len) {
+      const char *p = *environ;
+      if (!internal_memcmp(p, name, name_len) &&
+          p[name_len] == '=') {  // Match.
+        return *environ + name_len + 1;  // String starting after =.
+      }
+    }
+    environ++;
+  }
+  return NULL;
+}
+
+size_t AsanRead(int fd, void *buf, size_t count) {
+  return read(fd, buf, count);
+}
+
+int AsanClose(int fd) {
+  return close(fd);
+}
+
+AsanProcMaps::AsanProcMaps() {
+  Reset();
+}
+
+AsanProcMaps::~AsanProcMaps() {
+}
+
+// More information about Mach-O headers can be found in mach-o/loader.h
+// Each Mach-O image has a header (mach_header or mach_header_64) starting with
+// a magic number, and a list of linker load commands directly following the
+// header.
+// A load command is at least two 32-bit words: the command type and the
+// command size in bytes. We're interested only in segment load commands
+// (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped
+// into the task's address space.
+// The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or
+// segment_command_64 correspond to the memory address, memory size and the
+// file offset of the current memory segment.
+// Because these fields are taken from the images as is, one needs to add
+// _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime.
+
+void AsanProcMaps::Reset() {
+  // Count down from the top.
+  // TODO(glider): as per man 3 dyld, iterating over the headers with
+  // _dyld_image_count is thread-unsafe. We need to register callbacks for
+  // adding and removing images which will invalidate the AsanProcMaps state.
+  current_image_ = _dyld_image_count();
+  current_load_cmd_count_ = -1;
+  current_load_cmd_addr_ = NULL;
+  current_magic_ = 0;
+}
+
+// Next and NextSegmentLoad were inspired by base/sysinfo.cc in
+// Google Perftools, http://code.google.com/p/google-perftools.
+
+// NextSegmentLoad scans the current image for the next segment load command
+// and returns the start and end addresses and file offset of the corresponding
+// segment.
+// Note that the segment addresses are not necessarily sorted.
+template<uint32_t kLCSegment, typename SegmentCommand>
+bool AsanProcMaps::NextSegmentLoad(
+    uintptr_t *start, uintptr_t *end, uintptr_t *offset,
+    char filename[], size_t filename_size) {
+  const char* lc = current_load_cmd_addr_;
+  current_load_cmd_addr_ += ((const load_command *)lc)->cmdsize;
+  if (((const load_command *)lc)->cmd == kLCSegment) {
+    const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image_);
+    const SegmentCommand* sc = (const SegmentCommand *)lc;
+    if (start) *start = sc->vmaddr + dlloff;
+    if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
+    if (offset) *offset = sc->fileoff;
+    if (filename) {
+      REAL(strncpy)(filename, _dyld_get_image_name(current_image_),
+                    filename_size);
+    }
+    if (FLAG_v >= 4)
+      Report("LC_SEGMENT: %p--%p %s+%p\n", *start, *end, filename, *offset);
+    return true;
+  }
+  return false;
+}
+
+bool AsanProcMaps::Next(uintptr_t *start, uintptr_t *end,
+                        uintptr_t *offset, char filename[],
+                        size_t filename_size) {
+  for (; current_image_ >= 0; current_image_--) {
+    const mach_header* hdr = _dyld_get_image_header(current_image_);
+    if (!hdr) continue;
+    if (current_load_cmd_count_ < 0) {
+      // Set up for this image;
+      current_load_cmd_count_ = hdr->ncmds;
+      current_magic_ = hdr->magic;
+      switch (current_magic_) {
+#ifdef MH_MAGIC_64
+        case MH_MAGIC_64: {
+          current_load_cmd_addr_ = (char*)hdr + sizeof(mach_header_64);
+          break;
+        }
+#endif
+        case MH_MAGIC: {
+          current_load_cmd_addr_ = (char*)hdr + sizeof(mach_header);
+          break;
+        }
+        default: {
+          continue;
+        }
+      }
+    }
+
+    for (; current_load_cmd_count_ >= 0; current_load_cmd_count_--) {
+      switch (current_magic_) {
+        // current_magic_ may be only one of MH_MAGIC, MH_MAGIC_64.
+#ifdef MH_MAGIC_64
+        case MH_MAGIC_64: {
+          if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
+                  start, end, offset, filename, filename_size))
+            return true;
+          break;
+        }
+#endif
+        case MH_MAGIC: {
+          if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
+                  start, end, offset, filename, filename_size))
+            return true;
+          break;
+        }
+      }
+    }
+    // If we get here, no more load_cmd's in this image talk about
+    // segments.  Go on to the next image.
+  }
+  return false;
+}
+
+bool AsanProcMaps::GetObjectNameAndOffset(uintptr_t addr, uintptr_t *offset,
+                                          char filename[],
+                                          size_t filename_size) {
+  return IterateForObjectNameAndOffset(addr, offset, filename, filename_size);
+}
+
+void AsanThread::SetThreadStackTopAndBottom() {
+  size_t stacksize = pthread_get_stacksize_np(pthread_self());
+  void *stackaddr = pthread_get_stackaddr_np(pthread_self());
+  stack_top_ = (uintptr_t)stackaddr;
+  stack_bottom_ = stack_top_ - stacksize;
+  int local;
+  CHECK(AddrIsInStack((uintptr_t)&local));
+}
+
+AsanLock::AsanLock(LinkerInitialized) {
+  // We assume that OS_SPINLOCK_INIT is zero
+}
+
+void AsanLock::Lock() {
+  CHECK(sizeof(OSSpinLock) <= sizeof(opaque_storage_));
+  CHECK(OS_SPINLOCK_INIT == 0);
+  CHECK(owner_ != (uintptr_t)pthread_self());
+  OSSpinLockLock((OSSpinLock*)&opaque_storage_);
+  CHECK(!owner_);
+  owner_ = (uintptr_t)pthread_self();
+}
+
+void AsanLock::Unlock() {
+  CHECK(owner_ == (uintptr_t)pthread_self());
+  owner_ = 0;
+  OSSpinLockUnlock((OSSpinLock*)&opaque_storage_);
+}
+
+void AsanStackTrace::GetStackTrace(size_t max_s, uintptr_t pc, uintptr_t bp) {
+  size = 0;
+  trace[0] = pc;
+  if ((max_s) > 1) {
+    max_size = max_s;
+    FastUnwindStack(pc, bp);
+  }
+}
+
+// The range of pages to be used for escape islands.
+// TODO(glider): instead of mapping a fixed range we must find a range of
+// unmapped pages in vmmap and take them.
+// These constants were chosen empirically and may not work if the shadow
+// memory layout changes. Unfortunately they do necessarily depend on
+// kHighMemBeg or kHighMemEnd.
+static void *island_allocator_pos = NULL;
+
+#if __WORDSIZE == 32
+# define kIslandEnd (0xffdf0000 - kPageSize)
+# define kIslandBeg (kIslandEnd - 256 * kPageSize)
+#else
+# define kIslandEnd (0x7fffffdf0000 - kPageSize)
+# define kIslandBeg (kIslandEnd - 256 * kPageSize)
+#endif
+
+extern "C"
+mach_error_t __interception_allocate_island(void **ptr,
+                                            size_t unused_size,
+                                            void *unused_hint) {
+  if (!island_allocator_pos) {
+    island_allocator_pos =
+        asan_mmap((void*)kIslandBeg, kIslandEnd - kIslandBeg,
+                  PROT_READ | PROT_WRITE | PROT_EXEC,
+                  MAP_PRIVATE | MAP_ANON | MAP_FIXED,
+                 -1, 0);
+    if (island_allocator_pos != (void*)kIslandBeg) {
+      return KERN_NO_SPACE;
+    }
+    if (FLAG_v) {
+      Report("Mapped pages %p--%p for branch islands.\n",
+             kIslandBeg, kIslandEnd);
+    }
+    // Should not be very performance-critical.
+    internal_memset(island_allocator_pos, 0xCC, kIslandEnd - kIslandBeg);
+  };
+  *ptr = island_allocator_pos;
+  island_allocator_pos = (char*)island_allocator_pos + kPageSize;
+  if (FLAG_v) {
+    Report("Branch island allocated at %p\n", *ptr);
+  }
+  return err_none;
+}
+
+extern "C"
+mach_error_t __interception_deallocate_island(void *ptr) {
+  // Do nothing.
+  // TODO(glider): allow to free and reuse the island memory.
+  return err_none;
+}
+
+// Support for the following functions from libdispatch on Mac OS:
+//   dispatch_async_f()
+//   dispatch_async()
+//   dispatch_sync_f()
+//   dispatch_sync()
+//   dispatch_after_f()
+//   dispatch_after()
+//   dispatch_group_async_f()
+//   dispatch_group_async()
+// TODO(glider): libdispatch API contains other functions that we don't support
+// yet.
+//
+// dispatch_sync() and dispatch_sync_f() are synchronous, although chances are
+// they can cause jobs to run on a thread different from the current one.
+// TODO(glider): if so, we need a test for this (otherwise we should remove
+// them).
+//
+// The following functions use dispatch_barrier_async_f() (which isn't a library
+// function but is exported) and are thus supported:
+//   dispatch_source_set_cancel_handler_f()
+//   dispatch_source_set_cancel_handler()
+//   dispatch_source_set_event_handler_f()
+//   dispatch_source_set_event_handler()
+//
+// The reference manual for Grand Central Dispatch is available at
+//   http://developer.apple.com/library/mac/#documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
+// The implementation details are at
+//   http://libdispatch.macosforge.org/trac/browser/trunk/src/queue.c
+
+typedef void* pthread_workqueue_t;
+typedef void* pthread_workitem_handle_t;
+
+typedef void* dispatch_group_t;
+typedef void* dispatch_queue_t;
+typedef uint64_t dispatch_time_t;
+typedef void (*dispatch_function_t)(void *block);
+typedef void* (*worker_t)(void *block);
+
+// A wrapper for the ObjC blocks used to support libdispatch.
+typedef struct {
+  void *block;
+  dispatch_function_t func;
+  int parent_tid;
+} asan_block_context_t;
+
+// We use extern declarations of libdispatch functions here instead
+// of including <dispatch/dispatch.h>. This header is not present on
+// Mac OS X Leopard and eariler, and although we don't expect ASan to
+// work on legacy systems, it's bad to break the build of
+// LLVM compiler-rt there.
+extern "C" {
+void dispatch_async_f(dispatch_queue_t dq, void *ctxt,
+                      dispatch_function_t func);
+void dispatch_sync_f(dispatch_queue_t dq, void *ctxt,
+                     dispatch_function_t func);
+void dispatch_after_f(dispatch_time_t when, dispatch_queue_t dq, void *ctxt,
+                      dispatch_function_t func);
+void dispatch_barrier_async_f(dispatch_queue_t dq, void *ctxt,
+                              dispatch_function_t func);
+void dispatch_group_async_f(dispatch_group_t group, dispatch_queue_t dq,
+                            void *ctxt, dispatch_function_t func);
+int pthread_workqueue_additem_np(pthread_workqueue_t workq,
+    void *(*workitem_func)(void *), void * workitem_arg,
+    pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp);
+}  // extern "C"
+
+extern "C"
+void asan_dispatch_call_block_and_release(void *block) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  asan_block_context_t *context = (asan_block_context_t*)block;
+  if (FLAG_v >= 2) {
+    Report("asan_dispatch_call_block_and_release(): "
+           "context: %p, pthread_self: %p\n",
+           block, pthread_self());
+  }
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  if (!t) {
+    t = AsanThread::Create(context->parent_tid, NULL, NULL, &stack);
+    asanThreadRegistry().RegisterThread(t);
+    t->Init();
+    asanThreadRegistry().SetCurrent(t);
+  }
+  // Call the original dispatcher for the block.
+  context->func(context->block);
+  asan_free(context, &stack);
+}
+
+}  // namespace __asan
+
+using namespace __asan;  // NOLINT
+
+// Wrap |ctxt| and |func| into an asan_block_context_t.
+// The caller retains control of the allocated context.
+extern "C"
+asan_block_context_t *alloc_asan_context(void *ctxt, dispatch_function_t func,
+                                         AsanStackTrace *stack) {
+  asan_block_context_t *asan_ctxt =
+      (asan_block_context_t*) asan_malloc(sizeof(asan_block_context_t), stack);
+  asan_ctxt->block = ctxt;
+  asan_ctxt->func = func;
+  asan_ctxt->parent_tid = asanThreadRegistry().GetCurrentTidOrMinusOne();
+  return asan_ctxt;
+}
+
+// TODO(glider): can we reduce code duplication by introducing a macro?
+INTERCEPTOR(void, dispatch_async_f, dispatch_queue_t dq, void *ctxt,
+                                    dispatch_function_t func) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack);
+  if (FLAG_v >= 2) {
+    Report("dispatch_async_f(): context: %p, pthread_self: %p\n",
+        asan_ctxt, pthread_self());
+    PRINT_CURRENT_STACK();
+  }
+  return REAL(dispatch_async_f)(dq, (void*)asan_ctxt,
+                                asan_dispatch_call_block_and_release);
+}
+
+INTERCEPTOR(void, dispatch_sync_f, dispatch_queue_t dq, void *ctxt,
+                                   dispatch_function_t func) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack);
+  if (FLAG_v >= 2) {
+    Report("dispatch_sync_f(): context: %p, pthread_self: %p\n",
+        asan_ctxt, pthread_self());
+    PRINT_CURRENT_STACK();
+  }
+  return REAL(dispatch_sync_f)(dq, (void*)asan_ctxt,
+                               asan_dispatch_call_block_and_release);
+}
+
+INTERCEPTOR(void, dispatch_after_f, dispatch_time_t when,
+                                    dispatch_queue_t dq, void *ctxt,
+                                    dispatch_function_t func) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack);
+  if (FLAG_v >= 2) {
+    Report("dispatch_after_f: %p\n", asan_ctxt);
+    PRINT_CURRENT_STACK();
+  }
+  return REAL(dispatch_after_f)(when, dq, (void*)asan_ctxt,
+                                asan_dispatch_call_block_and_release);
+}
+
+INTERCEPTOR(void, dispatch_barrier_async_f, dispatch_queue_t dq, void *ctxt,
+                                            dispatch_function_t func) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack);
+  if (FLAG_v >= 2) {
+    Report("dispatch_barrier_async_f(): context: %p, pthread_self: %p\n",
+           asan_ctxt, pthread_self());
+    PRINT_CURRENT_STACK();
+  }
+  REAL(dispatch_barrier_async_f)(dq, (void*)asan_ctxt,
+                                 asan_dispatch_call_block_and_release);
+}
+
+INTERCEPTOR(void, dispatch_group_async_f, dispatch_group_t group,
+                                          dispatch_queue_t dq, void *ctxt,
+                                          dispatch_function_t func) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack);
+  if (FLAG_v >= 2) {
+    Report("dispatch_group_async_f(): context: %p, pthread_self: %p\n",
+           asan_ctxt, pthread_self());
+    PRINT_CURRENT_STACK();
+  }
+  REAL(dispatch_group_async_f)(group, dq, (void*)asan_ctxt,
+                               asan_dispatch_call_block_and_release);
+}
+
+// The following stuff has been extremely helpful while looking for the
+// unhandled functions that spawned jobs on Chromium shutdown. If the verbosity
+// level is 2 or greater, we wrap pthread_workqueue_additem_np() in order to
+// find the points of worker thread creation (each of such threads may be used
+// to run several tasks, that's why this is not enough to support the whole
+// libdispatch API.
+extern "C"
+void *wrap_workitem_func(void *arg) {
+  if (FLAG_v >= 2) {
+    Report("wrap_workitem_func: %p, pthread_self: %p\n", arg, pthread_self());
+  }
+  asan_block_context_t *ctxt = (asan_block_context_t*)arg;
+  worker_t fn = (worker_t)(ctxt->func);
+  void *result =  fn(ctxt->block);
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  asan_free(arg, &stack);
+  return result;
+}
+
+INTERCEPTOR(int, pthread_workqueue_additem_np, pthread_workqueue_t workq,
+    void *(*workitem_func)(void *), void * workitem_arg,
+    pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp) {
+  GET_STACK_TRACE_HERE(kStackTraceMax);
+  asan_block_context_t *asan_ctxt =
+      (asan_block_context_t*) asan_malloc(sizeof(asan_block_context_t), &stack);
+  asan_ctxt->block = workitem_arg;
+  asan_ctxt->func = (dispatch_function_t)workitem_func;
+  asan_ctxt->parent_tid = asanThreadRegistry().GetCurrentTidOrMinusOne();
+  if (FLAG_v >= 2) {
+    Report("pthread_workqueue_additem_np: %p\n", asan_ctxt);
+    PRINT_CURRENT_STACK();
+  }
+  return REAL(pthread_workqueue_additem_np)(workq, wrap_workitem_func,
+                                            asan_ctxt, itemhandlep,
+                                            gencountp);
+}
+
+// CF_RC_BITS, the layout of CFRuntimeBase and __CFStrIsConstant are internal
+// and subject to change in further CoreFoundation versions. Apple does not
+// guarantee any binary compatibility from release to release.
+
+// See http://opensource.apple.com/source/CF/CF-635.15/CFInternal.h
+#if defined(__BIG_ENDIAN__)
+#define CF_RC_BITS 0
+#endif
+
+#if defined(__LITTLE_ENDIAN__)
+#define CF_RC_BITS 3
+#endif
+
+// See http://opensource.apple.com/source/CF/CF-635.15/CFRuntime.h
+typedef struct __CFRuntimeBase {
+  uintptr_t _cfisa;
+  uint8_t _cfinfo[4];
+#if __LP64__
+  uint32_t _rc;
+#endif
+} CFRuntimeBase;
+
+// See http://opensource.apple.com/source/CF/CF-635.15/CFString.c
+int __CFStrIsConstant(CFStringRef str) {
+  CFRuntimeBase *base = (CFRuntimeBase*)str;
+#if __LP64__
+  return base->_rc == 0;
+#else
+  return (base->_cfinfo[CF_RC_BITS]) == 0;
+#endif
+}
+
+INTERCEPTOR(CFStringRef, CFStringCreateCopy, CFAllocatorRef alloc,
+                                             CFStringRef str) {
+  if (__CFStrIsConstant(str)) {
+    return str;
+  } else {
+    return REAL(CFStringCreateCopy)(alloc, str);
+  }
+}
+
+namespace __asan {
+
+void InitializeMacInterceptors() {
+  CHECK(INTERCEPT_FUNCTION(dispatch_async_f));
+  CHECK(INTERCEPT_FUNCTION(dispatch_sync_f));
+  CHECK(INTERCEPT_FUNCTION(dispatch_after_f));
+  CHECK(INTERCEPT_FUNCTION(dispatch_barrier_async_f));
+  CHECK(INTERCEPT_FUNCTION(dispatch_group_async_f));
+  // We don't need to intercept pthread_workqueue_additem_np() to support the
+  // libdispatch API, but it helps us to debug the unsupported functions. Let's
+  // intercept it only during verbose runs.
+  if (FLAG_v >= 2) {
+    CHECK(INTERCEPT_FUNCTION(pthread_workqueue_additem_np));
+  }
+  // Normally CFStringCreateCopy should not copy constant CF strings.
+  // Replacing the default CFAllocator causes constant strings to be copied
+  // rather than just returned, which leads to bugs in big applications like
+  // Chromium and WebKit, see
+  // http://code.google.com/p/address-sanitizer/issues/detail?id=10
+  // Until this problem is fixed we need to check that the string is
+  // non-constant before calling CFStringCreateCopy.
+  CHECK(INTERCEPT_FUNCTION(CFStringCreateCopy));
+}
+
+}  // namespace __asan
+
+#endif  // __APPLE__
diff --git a/lib/asan/asan_malloc_linux.cc b/lib/asan/asan_malloc_linux.cc
new file mode 100644
index 0000000..84ef929
--- /dev/null
+++ b/lib/asan/asan_malloc_linux.cc
@@ -0,0 +1,132 @@
+//===-- asan_malloc_linux.cc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Linux-specific malloc interception.
+// We simply define functions like malloc, free, realloc, etc.
+// They will replace the corresponding libc functions automagically.
+//===----------------------------------------------------------------------===//
+#ifdef __linux__
+
+#include "asan_allocator.h"
+#include "asan_interceptors.h"
+#include "asan_internal.h"
+#include "asan_stack.h"
+
+#include <malloc.h>
+
+#ifdef ANDROID
+struct MallocDebug {
+  void* (*malloc)(size_t bytes);
+  void  (*free)(void* mem);
+  void* (*calloc)(size_t n_elements, size_t elem_size);
+  void* (*realloc)(void* oldMem, size_t bytes);
+  void* (*memalign)(size_t alignment, size_t bytes);
+};
+
+const MallocDebug asan_malloc_dispatch ALIGNED(32) = {
+  malloc, free, calloc, realloc, memalign
+};
+
+extern "C" const MallocDebug* __libc_malloc_dispatch;
+
+namespace __asan {
+void ReplaceSystemMalloc() {
+  __libc_malloc_dispatch = &asan_malloc_dispatch;
+}
+}  // namespace __asan
+
+#else  // ANDROID
+
+namespace __asan {
+void ReplaceSystemMalloc() {
+}
+}  // namespace __asan
+#endif  // ANDROID
+
+// ---------------------- Replacement functions ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+INTERCEPTOR(void, free, void *ptr) {
+  GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+  asan_free(ptr, &stack);
+}
+
+INTERCEPTOR(void, cfree, void *ptr) {
+  GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+  asan_free(ptr, &stack);
+}
+
+INTERCEPTOR(void*, malloc, size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_malloc(size, &stack);
+}
+
+INTERCEPTOR(void*, calloc, size_t nmemb, size_t size) {
+  if (!asan_inited) {
+    // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
+    const size_t kCallocPoolSize = 1024;
+    static uintptr_t calloc_memory_for_dlsym[kCallocPoolSize];
+    static size_t allocated;
+    size_t size_in_words = ((nmemb * size) + kWordSize - 1) / kWordSize;
+    void *mem = (void*)&calloc_memory_for_dlsym[allocated];
+    allocated += size_in_words;
+    CHECK(allocated < kCallocPoolSize);
+    return mem;
+  }
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_calloc(nmemb, size, &stack);
+}
+
+INTERCEPTOR(void*, realloc, void *ptr, size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_realloc(ptr, size, &stack);
+}
+
+INTERCEPTOR(void*, memalign, size_t boundary, size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_memalign(boundary, size, &stack);
+}
+
+INTERCEPTOR(void*, __libc_memalign, size_t align, size_t s)
+  ALIAS("memalign");
+
+INTERCEPTOR(size_t, malloc_usable_size, void *ptr) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_malloc_usable_size(ptr, &stack);
+}
+
+INTERCEPTOR(struct mallinfo, mallinfo) {
+  struct mallinfo res;
+  REAL(memset)(&res, 0, sizeof(res));
+  return res;
+}
+
+INTERCEPTOR(int, mallopt, int cmd, int value) {
+  return -1;
+}
+
+INTERCEPTOR(int, posix_memalign, void **memptr, size_t alignment, size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  // Printf("posix_memalign: %zx %zu\n", alignment, size);
+  return asan_posix_memalign(memptr, alignment, size, &stack);
+}
+
+INTERCEPTOR(void*, valloc, size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_valloc(size, &stack);
+}
+
+INTERCEPTOR(void*, pvalloc, size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_pvalloc(size, &stack);
+}
+
+#endif  // __linux__
diff --git a/lib/asan/asan_malloc_mac.cc b/lib/asan/asan_malloc_mac.cc
new file mode 100644
index 0000000..14d6385
--- /dev/null
+++ b/lib/asan/asan_malloc_mac.cc
@@ -0,0 +1,394 @@
+//===-- asan_rtl.cc ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Mac-specific malloc interception.
+//===----------------------------------------------------------------------===//
+
+#ifdef __APPLE__
+
+#include <AvailabilityMacros.h>
+#include <CoreFoundation/CFBase.h>
+#include <malloc/malloc.h>
+#include <setjmp.h>
+
+#include "asan_allocator.h"
+#include "asan_interceptors.h"
+#include "asan_internal.h"
+#include "asan_stack.h"
+
+// Similar code is used in Google Perftools,
+// http://code.google.com/p/google-perftools.
+
+// ---------------------- Replacement functions ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+// The free() implementation provided by OS X calls malloc_zone_from_ptr()
+// to find the owner of |ptr|. If the result is NULL, an invalid free() is
+// reported. Our implementation falls back to asan_free() in this case
+// in order to print an ASan-style report.
+extern "C"
+void free(void *ptr) {
+  malloc_zone_t *zone = malloc_zone_from_ptr(ptr);
+  if (zone) {
+#if defined(MAC_OS_X_VERSION_10_6) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
+    if ((zone->version >= 6) && (zone->free_definite_size)) {
+      zone->free_definite_size(zone, ptr, malloc_size(ptr));
+    } else {
+      malloc_zone_free(zone, ptr);
+    }
+#else
+    malloc_zone_free(zone, ptr);
+#endif
+  } else {
+    GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+    asan_free(ptr, &stack);
+  }
+}
+
+// TODO(glider): do we need both zones?
+static malloc_zone_t *system_malloc_zone = NULL;
+static malloc_zone_t *system_purgeable_zone = NULL;
+
+// We need to provide wrappers around all the libc functions.
+namespace {
+// TODO(glider): the mz_* functions should be united with the Linux wrappers,
+// as they are basically copied from there.
+size_t mz_size(malloc_zone_t* zone, const void* ptr) {
+  // Fast path: check whether this pointer belongs to the original malloc zone.
+  // We cannot just call malloc_zone_from_ptr(), because it in turn
+  // calls our mz_size().
+  if (system_malloc_zone) {
+    if ((system_malloc_zone->size)(system_malloc_zone, ptr)) return 0;
+  }
+  return asan_mz_size(ptr);
+}
+
+void *mz_malloc(malloc_zone_t *zone, size_t size) {
+  if (!asan_inited) {
+    CHECK(system_malloc_zone);
+    return malloc_zone_malloc(system_malloc_zone, size);
+  }
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_malloc(size, &stack);
+}
+
+void *cf_malloc(CFIndex size, CFOptionFlags hint, void *info) {
+  if (!asan_inited) {
+    CHECK(system_malloc_zone);
+    return malloc_zone_malloc(system_malloc_zone, size);
+  }
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_malloc(size, &stack);
+}
+
+void *mz_calloc(malloc_zone_t *zone, size_t nmemb, size_t size) {
+  if (!asan_inited) {
+    // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
+    const size_t kCallocPoolSize = 1024;
+    static uintptr_t calloc_memory_for_dlsym[kCallocPoolSize];
+    static size_t allocated;
+    size_t size_in_words = ((nmemb * size) + kWordSize - 1) / kWordSize;
+    void *mem = (void*)&calloc_memory_for_dlsym[allocated];
+    allocated += size_in_words;
+    CHECK(allocated < kCallocPoolSize);
+    return mem;
+  }
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_calloc(nmemb, size, &stack);
+}
+
+void *mz_valloc(malloc_zone_t *zone, size_t size) {
+  if (!asan_inited) {
+    CHECK(system_malloc_zone);
+    return malloc_zone_valloc(system_malloc_zone, size);
+  }
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_memalign(kPageSize, size, &stack);
+}
+
+void print_zone_for_ptr(void *ptr) {
+  malloc_zone_t *orig_zone = malloc_zone_from_ptr(ptr);
+  if (orig_zone) {
+    if (orig_zone->zone_name) {
+      Printf("malloc_zone_from_ptr(%p) = %p, which is %s\n",
+             ptr, orig_zone, orig_zone->zone_name);
+    } else {
+      Printf("malloc_zone_from_ptr(%p) = %p, which doesn't have a name\n",
+             ptr, orig_zone);
+    }
+  } else {
+    Printf("malloc_zone_from_ptr(%p) = NULL\n", ptr);
+  }
+}
+
+// TODO(glider): the allocation callbacks need to be refactored.
+void mz_free(malloc_zone_t *zone, void *ptr) {
+  if (!ptr) return;
+  malloc_zone_t *orig_zone = malloc_zone_from_ptr(ptr);
+  // For some reason Chromium calls mz_free() for pointers that belong to
+  // DefaultPurgeableMallocZone instead of asan_zone. We might want to
+  // fix this someday.
+  if (orig_zone == system_purgeable_zone) {
+    system_purgeable_zone->free(system_purgeable_zone, ptr);
+    return;
+  }
+  if (asan_mz_size(ptr)) {
+    GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+    asan_free(ptr, &stack);
+  } else {
+    // Let us just leak this memory for now.
+    Printf("mz_free(%p) -- attempting to free unallocated memory.\n"
+           "AddressSanitizer is ignoring this error on Mac OS now.\n", ptr);
+    print_zone_for_ptr(ptr);
+    GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+    stack.PrintStack();
+    return;
+  }
+}
+
+void cf_free(void *ptr, void *info) {
+  if (!ptr) return;
+  malloc_zone_t *orig_zone = malloc_zone_from_ptr(ptr);
+  // For some reason Chromium calls mz_free() for pointers that belong to
+  // DefaultPurgeableMallocZone instead of asan_zone. We might want to
+  // fix this someday.
+  if (orig_zone == system_purgeable_zone) {
+    system_purgeable_zone->free(system_purgeable_zone, ptr);
+    return;
+  }
+  if (asan_mz_size(ptr)) {
+    GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+    asan_free(ptr, &stack);
+  } else {
+    // Let us just leak this memory for now.
+    Printf("cf_free(%p) -- attempting to free unallocated memory.\n"
+           "AddressSanitizer is ignoring this error on Mac OS now.\n", ptr);
+    print_zone_for_ptr(ptr);
+    GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+    stack.PrintStack();
+    return;
+  }
+}
+
+void *mz_realloc(malloc_zone_t *zone, void *ptr, size_t size) {
+  if (!ptr) {
+    GET_STACK_TRACE_HERE_FOR_MALLOC;
+    return asan_malloc(size, &stack);
+  } else {
+    if (asan_mz_size(ptr)) {
+      GET_STACK_TRACE_HERE_FOR_MALLOC;
+      return asan_realloc(ptr, size, &stack);
+    } else {
+      // We can't recover from reallocating an unknown address, because
+      // this would require reading at most |size| bytes from
+      // potentially unaccessible memory.
+      Printf("mz_realloc(%p) -- attempting to realloc unallocated memory.\n"
+             "This is an unrecoverable problem, exiting now.\n", ptr);
+      print_zone_for_ptr(ptr);
+      GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+      stack.PrintStack();
+      ShowStatsAndAbort();
+      return NULL;  // unreachable
+    }
+  }
+}
+
+void *cf_realloc(void *ptr, CFIndex size, CFOptionFlags hint, void *info) {
+  if (!ptr) {
+    GET_STACK_TRACE_HERE_FOR_MALLOC;
+    return asan_malloc(size, &stack);
+  } else {
+    if (asan_mz_size(ptr)) {
+      GET_STACK_TRACE_HERE_FOR_MALLOC;
+      return asan_realloc(ptr, size, &stack);
+    } else {
+      // We can't recover from reallocating an unknown address, because
+      // this would require reading at most |size| bytes from
+      // potentially unaccessible memory.
+      Printf("cf_realloc(%p) -- attempting to realloc unallocated memory.\n"
+             "This is an unrecoverable problem, exiting now.\n", ptr);
+      print_zone_for_ptr(ptr);
+      GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+      stack.PrintStack();
+      ShowStatsAndAbort();
+      return NULL;  // unreachable
+    }
+  }
+}
+
+void mz_destroy(malloc_zone_t* zone) {
+  // A no-op -- we will not be destroyed!
+  Printf("mz_destroy() called -- ignoring\n");
+}
+  // from AvailabilityMacros.h
+#if defined(MAC_OS_X_VERSION_10_6) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
+void *mz_memalign(malloc_zone_t *zone, size_t align, size_t size) {
+  if (!asan_inited) {
+    CHECK(system_malloc_zone);
+    return malloc_zone_memalign(system_malloc_zone, align, size);
+  }
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_memalign(align, size, &stack);
+}
+
+// This function is currently unused, and we build with -Werror.
+#if 0
+void mz_free_definite_size(malloc_zone_t* zone, void *ptr, size_t size) {
+  // TODO(glider): check that |size| is valid.
+  UNIMPLEMENTED();
+}
+#endif
+#endif
+
+// malloc_introspection callbacks.  I'm not clear on what all of these do.
+kern_return_t mi_enumerator(task_t task, void *,
+                            unsigned type_mask, vm_address_t zone_address,
+                            memory_reader_t reader,
+                            vm_range_recorder_t recorder) {
+  // Should enumerate all the pointers we have.  Seems like a lot of work.
+  return KERN_FAILURE;
+}
+
+size_t mi_good_size(malloc_zone_t *zone, size_t size) {
+  // I think it's always safe to return size, but we maybe could do better.
+  return size;
+}
+
+boolean_t mi_check(malloc_zone_t *zone) {
+  UNIMPLEMENTED();
+  return true;
+}
+
+void mi_print(malloc_zone_t *zone, boolean_t verbose) {
+  UNIMPLEMENTED();
+  return;
+}
+
+void mi_log(malloc_zone_t *zone, void *address) {
+  // I don't think we support anything like this
+}
+
+void mi_force_lock(malloc_zone_t *zone) {
+  asan_mz_force_lock();
+}
+
+void mi_force_unlock(malloc_zone_t *zone) {
+  asan_mz_force_unlock();
+}
+
+// This function is currently unused, and we build with -Werror.
+#if 0
+void mi_statistics(malloc_zone_t *zone, malloc_statistics_t *stats) {
+  // TODO(csilvers): figure out how to fill these out
+  // TODO(glider): port this from tcmalloc when ready.
+  stats->blocks_in_use = 0;
+  stats->size_in_use = 0;
+  stats->max_size_in_use = 0;
+  stats->size_allocated = 0;
+}
+#endif
+
+#if defined(MAC_OS_X_VERSION_10_6) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
+boolean_t mi_zone_locked(malloc_zone_t *zone) {
+  // UNIMPLEMENTED();
+  return false;
+}
+#endif
+
+}  // unnamed namespace
+
+extern bool kCFUseCollectableAllocator;  // is GC on?
+
+namespace __asan {
+void ReplaceSystemMalloc() {
+  static malloc_introspection_t asan_introspection;
+  // Ok to use internal_memset, these places are not performance-critical.
+  internal_memset(&asan_introspection, 0, sizeof(asan_introspection));
+
+  asan_introspection.enumerator = &mi_enumerator;
+  asan_introspection.good_size = &mi_good_size;
+  asan_introspection.check = &mi_check;
+  asan_introspection.print = &mi_print;
+  asan_introspection.log = &mi_log;
+  asan_introspection.force_lock = &mi_force_lock;
+  asan_introspection.force_unlock = &mi_force_unlock;
+
+  static malloc_zone_t asan_zone;
+  internal_memset(&asan_zone, 0, sizeof(malloc_zone_t));
+
+  // Start with a version 4 zone which is used for OS X 10.4 and 10.5.
+  asan_zone.version = 4;
+  asan_zone.zone_name = "asan";
+  asan_zone.size = &mz_size;
+  asan_zone.malloc = &mz_malloc;
+  asan_zone.calloc = &mz_calloc;
+  asan_zone.valloc = &mz_valloc;
+  asan_zone.free = &mz_free;
+  asan_zone.realloc = &mz_realloc;
+  asan_zone.destroy = &mz_destroy;
+  asan_zone.batch_malloc = NULL;
+  asan_zone.batch_free = NULL;
+  asan_zone.introspect = &asan_introspection;
+
+  // from AvailabilityMacros.h
+#if defined(MAC_OS_X_VERSION_10_6) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
+  // Switch to version 6 on OSX 10.6 to support memalign.
+  asan_zone.version = 6;
+  asan_zone.free_definite_size = 0;
+  asan_zone.memalign = &mz_memalign;
+  asan_introspection.zone_locked = &mi_zone_locked;
+
+  // Request the default purgable zone to force its creation. The
+  // current default zone is registered with the purgable zone for
+  // doing tiny and small allocs.  Sadly, it assumes that the default
+  // zone is the szone implementation from OS X and will crash if it
+  // isn't.  By creating the zone now, this will be true and changing
+  // the default zone won't cause a problem.  (OS X 10.6 and higher.)
+  system_purgeable_zone = malloc_default_purgeable_zone();
+#endif
+
+  // Register the ASan zone. At this point, it will not be the
+  // default zone.
+  malloc_zone_register(&asan_zone);
+
+  // Unregister and reregister the default zone.  Unregistering swaps
+  // the specified zone with the last one registered which for the
+  // default zone makes the more recently registered zone the default
+  // zone.  The default zone is then re-registered to ensure that
+  // allocations made from it earlier will be handled correctly.
+  // Things are not guaranteed to work that way, but it's how they work now.
+  system_malloc_zone = malloc_default_zone();
+  malloc_zone_unregister(system_malloc_zone);
+  malloc_zone_register(system_malloc_zone);
+  // Make sure the default allocator was replaced.
+  CHECK(malloc_default_zone() == &asan_zone);
+
+  if (FLAG_replace_cfallocator) {
+    static CFAllocatorContext asan_context =
+        { /*version*/ 0, /*info*/ &asan_zone,
+          /*retain*/ NULL, /*release*/ NULL,
+          /*copyDescription*/NULL,
+          /*allocate*/ &cf_malloc,
+          /*reallocate*/ &cf_realloc,
+          /*deallocate*/ &cf_free,
+          /*preferredSize*/ NULL };
+    CFAllocatorRef cf_asan =
+        CFAllocatorCreate(kCFAllocatorUseContext, &asan_context);
+    CFAllocatorSetDefault(cf_asan);
+  }
+}
+}  // namespace __asan
+
+#endif  // __APPLE__
diff --git a/lib/asan/asan_malloc_win.cc b/lib/asan/asan_malloc_win.cc
new file mode 100644
index 0000000..42ba8fe
--- /dev/null
+++ b/lib/asan/asan_malloc_win.cc
@@ -0,0 +1,141 @@
+//===-- asan_malloc_win.cc --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Windows-specific malloc interception.
+//===----------------------------------------------------------------------===//
+#ifdef _WIN32
+
+#include "asan_allocator.h"
+#include "asan_interceptors.h"
+#include "asan_internal.h"
+#include "asan_stack.h"
+
+#include "interception/interception.h"
+
+// ---------------------- Replacement functions ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+// FIXME: Simply defining functions with the same signature in *.obj
+// files overrides the standard functions in *.lib
+// This works well for simple helloworld-like tests but might need to be
+// revisited in the future.
+
+extern "C" {
+void free(void *ptr) {
+  GET_STACK_TRACE_HERE_FOR_FREE(ptr);
+  return asan_free(ptr, &stack);
+}
+
+void _free_dbg(void* ptr, int) {
+  free(ptr);
+}
+
+void cfree(void *ptr) {
+  CHECK(!"cfree() should not be used on Windows?");
+}
+
+void *malloc(size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_malloc(size, &stack);
+}
+
+void* _malloc_dbg(size_t size, int , const char*, int) {
+  return malloc(size);
+}
+
+void *calloc(size_t nmemb, size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_calloc(nmemb, size, &stack);
+}
+
+void* _calloc_dbg(size_t n, size_t size, int, const char*, int) {
+  return calloc(n, size);
+}
+
+void *_calloc_impl(size_t nmemb, size_t size, int *errno_tmp) {
+  return calloc(nmemb, size);
+}
+
+void *realloc(void *ptr, size_t size) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_realloc(ptr, size, &stack);
+}
+
+void *_realloc_dbg(void *ptr, size_t size, int) {
+  CHECK(!"_realloc_dbg should not exist!");
+  return NULL;
+}
+
+void* _recalloc(void* p, size_t n, size_t elem_size) {
+  if (!p)
+    return calloc(n, elem_size);
+  const size_t size = n * elem_size;
+  if (elem_size != 0 && size / elem_size != n)
+    return NULL;
+  return realloc(p, size);
+}
+
+size_t _msize(void *ptr) {
+  GET_STACK_TRACE_HERE_FOR_MALLOC;
+  return asan_malloc_usable_size(ptr, &stack);
+}
+
+int _CrtDbgReport(int, const char*, int,
+                  const char*, const char*, ...) {
+  ShowStatsAndAbort();
+}
+
+int _CrtDbgReportW(int reportType, const wchar_t*, int,
+                   const wchar_t*, const wchar_t*, ...) {
+  ShowStatsAndAbort();
+}
+
+int _CrtSetReportMode(int, int) {
+  return 0;
+}
+}  // extern "C"
+
+using __interception::GetRealFunctionAddress;
+
+// We don't want to include "windows.h" in this file to avoid extra attributes
+// set on malloc/free etc (e.g. dllimport), so declare a few things manually:
+extern "C" int __stdcall VirtualProtect(void* addr, size_t size,
+                                        DWORD prot, DWORD *old_prot);
+const int PAGE_EXECUTE_READWRITE = 0x40;
+
+namespace __asan {
+void ReplaceSystemMalloc() {
+#if defined(_DLL)
+# ifdef _WIN64
+#  error ReplaceSystemMalloc was not tested on x64
+# endif
+  char *crt_malloc;
+  if (GetRealFunctionAddress("malloc", (void**)&crt_malloc)) {
+    // Replace malloc in the CRT dll with a jump to our malloc.
+    DWORD old_prot, unused;
+    CHECK(VirtualProtect(crt_malloc, 16, PAGE_EXECUTE_READWRITE, &old_prot));
+    REAL(memset)(crt_malloc, 0xCC /* int 3 */, 16);  // just in case.
+
+    ptrdiff_t jmp_offset = (char*)malloc - (char*)crt_malloc - 5;
+    crt_malloc[0] = 0xE9;  // jmp, should be followed by an offset.
+    REAL(memcpy)(crt_malloc + 1, &jmp_offset, sizeof(jmp_offset));
+
+    CHECK(VirtualProtect(crt_malloc, 16, old_prot, &unused));
+
+    // FYI: FlushInstructionCache is needed on Itanium etc but not on x86/x64.
+  }
+
+  // FIXME: investigate whether anything else is needed.
+#endif
+}
+}  // namespace __asan
+
+#endif  // _WIN32
diff --git a/lib/asan/asan_mapping.h b/lib/asan/asan_mapping.h
new file mode 100644
index 0000000..4824bf1
--- /dev/null
+++ b/lib/asan/asan_mapping.h
@@ -0,0 +1,112 @@
+//===-- asan_mapping.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Defines ASan memory mapping.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_MAPPING_H
+#define ASAN_MAPPING_H
+
+#include "asan_internal.h"
+
+// The full explanation of the memory mapping could be found here:
+// http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm
+
+#if ASAN_FLEXIBLE_MAPPING_AND_OFFSET == 1
+extern __attribute__((visibility("default"))) uintptr_t __asan_mapping_scale;
+extern __attribute__((visibility("default"))) uintptr_t __asan_mapping_offset;
+#define SHADOW_SCALE (__asan_mapping_scale)
+#define SHADOW_OFFSET (__asan_mapping_offset)
+#else
+#define SHADOW_SCALE (3)
+#if __WORDSIZE == 32
+#define SHADOW_OFFSET (1 << 29)
+#else
+#define SHADOW_OFFSET (1ULL << 44)
+#endif
+#endif  // ASAN_FLEXIBLE_MAPPING_AND_OFFSET
+
+#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
+#define MEM_TO_SHADOW(mem) (((mem) >> SHADOW_SCALE) | (SHADOW_OFFSET))
+
+#if __WORDSIZE == 64
+  static const size_t kHighMemEnd = 0x00007fffffffffffUL;
+#else  // __WORDSIZE == 32
+  static const size_t kHighMemEnd = 0xffffffff;
+#endif  // __WORDSIZE
+
+
+#define kLowMemBeg      0
+#define kLowMemEnd      (SHADOW_OFFSET ? SHADOW_OFFSET - 1 : 0)
+
+#define kLowShadowBeg   SHADOW_OFFSET
+#define kLowShadowEnd   MEM_TO_SHADOW(kLowMemEnd)
+
+#define kHighMemBeg     (MEM_TO_SHADOW(kHighMemEnd) + 1)
+
+#define kHighShadowBeg  MEM_TO_SHADOW(kHighMemBeg)
+#define kHighShadowEnd  MEM_TO_SHADOW(kHighMemEnd)
+
+#define kShadowGapBeg   (kLowShadowEnd ? kLowShadowEnd + 1 : 16 * kPageSize)
+#define kShadowGapEnd   (kHighShadowBeg - 1)
+
+#define kGlobalAndStackRedzone \
+      (SHADOW_GRANULARITY < 32 ? 32 : SHADOW_GRANULARITY)
+
+namespace __asan {
+
+static inline bool AddrIsInLowMem(uintptr_t a) {
+  return a < kLowMemEnd;
+}
+
+static inline bool AddrIsInLowShadow(uintptr_t a) {
+  return a >= kLowShadowBeg && a <= kLowShadowEnd;
+}
+
+static inline bool AddrIsInHighMem(uintptr_t a) {
+  return a >= kHighMemBeg && a <= kHighMemEnd;
+}
+
+static inline bool AddrIsInMem(uintptr_t a) {
+  return AddrIsInLowMem(a) || AddrIsInHighMem(a);
+}
+
+static inline uintptr_t MemToShadow(uintptr_t p) {
+  CHECK(AddrIsInMem(p));
+  return MEM_TO_SHADOW(p);
+}
+
+static inline bool AddrIsInHighShadow(uintptr_t a) {
+  return a >= kHighShadowBeg && a <=  kHighMemEnd;
+}
+
+static inline bool AddrIsInShadow(uintptr_t a) {
+  return AddrIsInLowShadow(a) || AddrIsInHighShadow(a);
+}
+
+static inline bool AddrIsAlignedByGranularity(uintptr_t a) {
+  return (a & (SHADOW_GRANULARITY - 1)) == 0;
+}
+
+static inline bool AddressIsPoisoned(uintptr_t a) {
+  const size_t kAccessSize = 1;
+  uint8_t *shadow_address = (uint8_t*)MemToShadow(a);
+  int8_t shadow_value = *shadow_address;
+  if (shadow_value) {
+    uint8_t last_accessed_byte = (a & (SHADOW_GRANULARITY - 1))
+                                 + kAccessSize - 1;
+    return (last_accessed_byte >= shadow_value);
+  }
+  return false;
+}
+
+}  // namespace __asan
+
+#endif  // ASAN_MAPPING_H
diff --git a/lib/asan/asan_new_delete.cc b/lib/asan/asan_new_delete.cc
new file mode 100644
index 0000000..74f8c4e
--- /dev/null
+++ b/lib/asan/asan_new_delete.cc
@@ -0,0 +1,55 @@
+//===-- asan_interceptors.cc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Interceptors for operators new and delete.
+//===----------------------------------------------------------------------===//
+
+#include "asan_allocator.h"
+#include "asan_internal.h"
+#include "asan_stack.h"
+
+#include <new>
+
+namespace __asan {
+// This function is a no-op. We need it to make sure that object file
+// with our replacements will actually be loaded from static ASan
+// run-time library at link-time.
+void ReplaceOperatorsNewAndDelete() { }
+}
+
+using namespace __asan;  // NOLINT
+
+#define OPERATOR_NEW_BODY \
+  GET_STACK_TRACE_HERE_FOR_MALLOC;\
+  return asan_memalign(0, size, &stack);
+
+#ifdef ANDROID
+void *operator new(size_t size) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size) { OPERATOR_NEW_BODY; }
+#else
+void *operator new(size_t size) throw(std::bad_alloc) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size) throw(std::bad_alloc) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size, std::nothrow_t const&) throw()
+{ OPERATOR_NEW_BODY; }
+void *operator new[](size_t size, std::nothrow_t const&) throw()
+{ OPERATOR_NEW_BODY; }
+#endif
+
+#define OPERATOR_DELETE_BODY \
+  GET_STACK_TRACE_HERE_FOR_FREE(ptr);\
+  asan_free(ptr, &stack);
+
+void operator delete(void *ptr) throw() { OPERATOR_DELETE_BODY; }
+void operator delete[](void *ptr) throw() { OPERATOR_DELETE_BODY; }
+void operator delete(void *ptr, std::nothrow_t const&) throw()
+{ OPERATOR_DELETE_BODY; }
+void operator delete[](void *ptr, std::nothrow_t const&) throw()
+{ OPERATOR_DELETE_BODY; }
diff --git a/lib/asan/asan_poisoning.cc b/lib/asan/asan_poisoning.cc
new file mode 100644
index 0000000..69ee34a
--- /dev/null
+++ b/lib/asan/asan_poisoning.cc
@@ -0,0 +1,151 @@
+//===-- asan_poisoning.cc ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Shadow memory poisoning by ASan RTL and by user application.
+//===----------------------------------------------------------------------===//
+
+#include "asan_interceptors.h"
+#include "asan_interface.h"
+#include "asan_internal.h"
+#include "asan_mapping.h"
+
+namespace __asan {
+
+void PoisonShadow(uintptr_t addr, size_t size, uint8_t value) {
+  CHECK(AddrIsAlignedByGranularity(addr));
+  CHECK(AddrIsAlignedByGranularity(addr + size));
+  uintptr_t shadow_beg = MemToShadow(addr);
+  uintptr_t shadow_end = MemToShadow(addr + size);
+  CHECK(REAL(memset) != NULL);
+  REAL(memset)((void*)shadow_beg, value, shadow_end - shadow_beg);
+}
+
+void PoisonShadowPartialRightRedzone(uintptr_t addr,
+                                     uintptr_t size,
+                                     uintptr_t redzone_size,
+                                     uint8_t value) {
+  CHECK(AddrIsAlignedByGranularity(addr));
+  uint8_t *shadow = (uint8_t*)MemToShadow(addr);
+  for (uintptr_t i = 0; i < redzone_size;
+       i += SHADOW_GRANULARITY, shadow++) {
+    if (i + SHADOW_GRANULARITY <= size) {
+      *shadow = 0;  // fully addressable
+    } else if (i >= size) {
+      *shadow = (SHADOW_GRANULARITY == 128) ? 0xff : value;  // unaddressable
+    } else {
+      *shadow = size - i;  // first size-i bytes are addressable
+    }
+  }
+}
+
+
+struct ShadowSegmentEndpoint {
+  uint8_t *chunk;
+  int8_t offset;  // in [0, SHADOW_GRANULARITY)
+  int8_t value;  // = *chunk;
+
+  explicit ShadowSegmentEndpoint(uintptr_t address) {
+    chunk = (uint8_t*)MemToShadow(address);
+    offset = address & (SHADOW_GRANULARITY - 1);
+    value = *chunk;
+  }
+};
+
+}  // namespace __asan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+// Current implementation of __asan_(un)poison_memory_region doesn't check
+// that user program (un)poisons the memory it owns. It poisons memory
+// conservatively, and unpoisons progressively to make sure asan shadow
+// mapping invariant is preserved (see detailed mapping description here:
+// http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm).
+//
+// * if user asks to poison region [left, right), the program poisons
+// at least [left, AlignDown(right)).
+// * if user asks to unpoison region [left, right), the program unpoisons
+// at most [AlignDown(left), right).
+void __asan_poison_memory_region(void const volatile *addr, size_t size) {
+  if (!FLAG_allow_user_poisoning || size == 0) return;
+  uintptr_t beg_addr = (uintptr_t)addr;
+  uintptr_t end_addr = beg_addr + size;
+  if (FLAG_v >= 1) {
+    Printf("Trying to poison memory region [%p, %p)\n", beg_addr, end_addr);
+  }
+  ShadowSegmentEndpoint beg(beg_addr);
+  ShadowSegmentEndpoint end(end_addr);
+  if (beg.chunk == end.chunk) {
+    CHECK(beg.offset < end.offset);
+    int8_t value = beg.value;
+    CHECK(value == end.value);
+    // We can only poison memory if the byte in end.offset is unaddressable.
+    // No need to re-poison memory if it is poisoned already.
+    if (value > 0 && value <= end.offset) {
+      if (beg.offset > 0) {
+        *beg.chunk = Min(value, beg.offset);
+      } else {
+        *beg.chunk = kAsanUserPoisonedMemoryMagic;
+      }
+    }
+    return;
+  }
+  CHECK(beg.chunk < end.chunk);
+  if (beg.offset > 0) {
+    // Mark bytes from beg.offset as unaddressable.
+    if (beg.value == 0) {
+      *beg.chunk = beg.offset;
+    } else {
+      *beg.chunk = Min(beg.value, beg.offset);
+    }
+    beg.chunk++;
+  }
+  REAL(memset)(beg.chunk, kAsanUserPoisonedMemoryMagic, end.chunk - beg.chunk);
+  // Poison if byte in end.offset is unaddressable.
+  if (end.value > 0 && end.value <= end.offset) {
+    *end.chunk = kAsanUserPoisonedMemoryMagic;
+  }
+}
+
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size) {
+  if (!FLAG_allow_user_poisoning || size == 0) return;
+  uintptr_t beg_addr = (uintptr_t)addr;
+  uintptr_t end_addr = beg_addr + size;
+  if (FLAG_v >= 1) {
+    Printf("Trying to unpoison memory region [%p, %p)\n", beg_addr, end_addr);
+  }
+  ShadowSegmentEndpoint beg(beg_addr);
+  ShadowSegmentEndpoint end(end_addr);
+  if (beg.chunk == end.chunk) {
+    CHECK(beg.offset < end.offset);
+    int8_t value = beg.value;
+    CHECK(value == end.value);
+    // We unpoison memory bytes up to enbytes up to end.offset if it is not
+    // unpoisoned already.
+    if (value != 0) {
+      *beg.chunk = Max(value, end.offset);
+    }
+    return;
+  }
+  CHECK(beg.chunk < end.chunk);
+  if (beg.offset > 0) {
+    *beg.chunk = 0;
+    beg.chunk++;
+  }
+  REAL(memset)(beg.chunk, 0, end.chunk - beg.chunk);
+  if (end.offset > 0 && end.value != 0) {
+    *end.chunk = Max(end.value, end.offset);
+  }
+}
+
+bool __asan_address_is_poisoned(void const volatile *addr) {
+  return __asan::AddressIsPoisoned((uintptr_t)addr);
+}
diff --git a/lib/asan/asan_posix.cc b/lib/asan/asan_posix.cc
new file mode 100644
index 0000000..cd198bc
--- /dev/null
+++ b/lib/asan/asan_posix.cc
@@ -0,0 +1,189 @@
+//===-- asan_linux.cc -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Posix-specific details.
+//===----------------------------------------------------------------------===//
+#if defined(__linux__) || defined(__APPLE__)
+
+#include "asan_internal.h"
+#include "asan_interceptors.h"
+#include "asan_procmaps.h"
+#include "asan_stack.h"
+#include "asan_thread_registry.h"
+
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#ifdef ANDROID
+#include <sys/atomics.h>
+#endif
+
+// Should not add dependency on libstdc++,
+// since most of the stuff here is inlinable.
+#include <algorithm>
+
+static const size_t kAltStackSize = SIGSTKSZ * 4;  // SIGSTKSZ is not enough.
+
+namespace __asan {
+
+static void MaybeInstallSigaction(int signum,
+                                  void (*handler)(int, siginfo_t *, void *)) {
+  if (!AsanInterceptsSignal(signum))
+    return;
+  struct sigaction sigact;
+  REAL(memset)(&sigact, 0, sizeof(sigact));
+  sigact.sa_sigaction = handler;
+  sigact.sa_flags = SA_SIGINFO;
+  if (FLAG_use_sigaltstack) sigact.sa_flags |= SA_ONSTACK;
+  CHECK(0 == REAL(sigaction)(signum, &sigact, 0));
+}
+
+static void     ASAN_OnSIGSEGV(int, siginfo_t *siginfo, void *context) {
+  uintptr_t addr = (uintptr_t)siginfo->si_addr;
+  // Write the first message using the bullet-proof write.
+  if (13 != AsanWrite(2, "ASAN:SIGSEGV\n", 13)) AsanDie();
+  uintptr_t pc, sp, bp;
+  GetPcSpBp(context, &pc, &sp, &bp);
+  Report("ERROR: AddressSanitizer crashed on unknown address %p"
+         " (pc %p sp %p bp %p T%d)\n",
+         addr, pc, sp, bp,
+         asanThreadRegistry().GetCurrentTidOrMinusOne());
+  Printf("AddressSanitizer can not provide additional info. ABORTING\n");
+  GET_STACK_TRACE_WITH_PC_AND_BP(kStackTraceMax, pc, bp);
+  stack.PrintStack();
+  ShowStatsAndAbort();
+}
+
+void SetAlternateSignalStack() {
+  stack_t altstack, oldstack;
+  CHECK(0 == sigaltstack(NULL, &oldstack));
+  // If the alternate stack is already in place, do nothing.
+  if ((oldstack.ss_flags & SS_DISABLE) == 0) return;
+  // TODO(glider): the mapped stack should have the MAP_STACK flag in the
+  // future. It is not required by man 2 sigaltstack now (they're using
+  // malloc()).
+  void* base = AsanMmapSomewhereOrDie(kAltStackSize, __FUNCTION__);
+  altstack.ss_sp = base;
+  altstack.ss_flags = 0;
+  altstack.ss_size = kAltStackSize;
+  CHECK(0 == sigaltstack(&altstack, NULL));
+  if (FLAG_v > 0) {
+    Report("Alternative stack for T%d set: [%p,%p)\n",
+           asanThreadRegistry().GetCurrentTidOrMinusOne(),
+           altstack.ss_sp, (char*)altstack.ss_sp + altstack.ss_size);
+  }
+}
+
+void UnsetAlternateSignalStack() {
+  stack_t altstack, oldstack;
+  altstack.ss_sp = NULL;
+  altstack.ss_flags = SS_DISABLE;
+  altstack.ss_size = 0;
+  CHECK(0 == sigaltstack(&altstack, &oldstack));
+  AsanUnmapOrDie(oldstack.ss_sp, oldstack.ss_size);
+}
+
+void InstallSignalHandlers() {
+  // Set the alternate signal stack for the main thread.
+  // This will cause SetAlternateSignalStack to be called twice, but the stack
+  // will be actually set only once.
+  if (FLAG_use_sigaltstack) SetAlternateSignalStack();
+  MaybeInstallSigaction(SIGSEGV, ASAN_OnSIGSEGV);
+  MaybeInstallSigaction(SIGBUS, ASAN_OnSIGSEGV);
+}
+
+void AsanDisableCoreDumper() {
+  struct rlimit nocore;
+  nocore.rlim_cur = 0;
+  nocore.rlim_max = 0;
+  setrlimit(RLIMIT_CORE, &nocore);
+}
+
+void AsanDumpProcessMap() {
+  AsanProcMaps proc_maps;
+  uintptr_t start, end;
+  const intptr_t kBufSize = 4095;
+  char filename[kBufSize];
+  Report("Process memory map follows:\n");
+  while (proc_maps.Next(&start, &end, /* file_offset */NULL,
+                        filename, kBufSize)) {
+    Printf("\t%p-%p\t%s\n", (void*)start, (void*)end, filename);
+  }
+  Report("End of process memory map.\n");
+}
+
+int GetPid() {
+  return getpid();
+}
+
+uintptr_t GetThreadSelf() {
+  return (uintptr_t)pthread_self();
+}
+
+void SleepForSeconds(int seconds) {
+  sleep(seconds);
+}
+
+void Exit(int exitcode) {
+  _exit(exitcode);
+}
+
+void Abort() {
+  abort();
+}
+
+int Atexit(void (*function)(void)) {
+  return atexit(function);
+}
+
+int AtomicInc(int *a) {
+#ifdef ANDROID
+  return __atomic_inc(a) + 1;
+#else
+  return __sync_add_and_fetch(a, 1);
+#endif
+}
+
+uint16_t AtomicExchange(uint16_t *a, uint16_t new_val) {
+  return __sync_lock_test_and_set(a, new_val);
+}
+
+void SortArray(uintptr_t *array, size_t size) {
+  std::sort(array, array + size);
+}
+
+// ---------------------- TSD ---------------- {{{1
+
+static pthread_key_t tsd_key;
+static bool tsd_key_inited = false;
+void AsanTSDInit(void (*destructor)(void *tsd)) {
+  CHECK(!tsd_key_inited);
+  tsd_key_inited = true;
+  CHECK(0 == pthread_key_create(&tsd_key, destructor));
+}
+
+void *AsanTSDGet() {
+  CHECK(tsd_key_inited);
+  return pthread_getspecific(tsd_key);
+}
+
+void AsanTSDSet(void *tsd) {
+  CHECK(tsd_key_inited);
+  pthread_setspecific(tsd_key, tsd);
+}
+
+}  // namespace __asan
+
+#endif  // __linux__ || __APPLE_
diff --git a/lib/asan/asan_printf.cc b/lib/asan/asan_printf.cc
new file mode 100644
index 0000000..4f08f54
--- /dev/null
+++ b/lib/asan/asan_printf.cc
@@ -0,0 +1,206 @@
+//===-- asan_printf.cc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Internal printf function, used inside ASan run-time library.
+// We can't use libc printf because we intercept some of the functions used
+// inside it.
+//===----------------------------------------------------------------------===//
+
+#include "asan_internal.h"
+#include "asan_interceptors.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+namespace __asan {
+
+extern char *error_message_buffer;
+extern size_t error_message_buffer_pos, error_message_buffer_size;
+
+void RawWrite(const char *buffer) {
+  static const char *kRawWriteError = "RawWrite can't output requested buffer!";
+  size_t length = (size_t)internal_strlen(buffer);
+  if (length != AsanWrite(2, buffer, length)) {
+    AsanWrite(2, kRawWriteError, internal_strlen(kRawWriteError));
+    AsanDie();
+  }
+  if (error_message_buffer) {
+    int remaining = error_message_buffer_size - error_message_buffer_pos;
+    internal_strncpy(error_message_buffer + error_message_buffer_pos,
+                     buffer, remaining);
+    error_message_buffer[error_message_buffer_size - 1] = '\0';
+    // FIXME: reallocate the buffer instead of truncating the message.
+    error_message_buffer_pos += remaining > length ? length : remaining;
+  }
+}
+
+static inline int AppendChar(char **buff, const char *buff_end, char c) {
+  if (*buff < buff_end) {
+    **buff = c;
+    (*buff)++;
+  }
+  return 1;
+}
+
+// Appends number in a given base to buffer. If its length is less than
+// "minimal_num_length", it is padded with leading zeroes.
+static int AppendUnsigned(char **buff, const char *buff_end, uint64_t num,
+                          uint8_t base, uint8_t minimal_num_length) {
+  size_t const kMaxLen = 30;
+  RAW_CHECK(base == 10 || base == 16);
+  RAW_CHECK(minimal_num_length < kMaxLen);
+  size_t num_buffer[kMaxLen];
+  size_t pos = 0;
+  do {
+    RAW_CHECK_MSG(pos < kMaxLen, "appendNumber buffer overflow");
+    num_buffer[pos++] = num % base;
+    num /= base;
+  } while (num > 0);
+  while (pos < minimal_num_length) num_buffer[pos++] = 0;
+  int result = 0;
+  while (pos-- > 0) {
+    size_t digit = num_buffer[pos];
+    result += AppendChar(buff, buff_end, (digit < 10) ? '0' + digit
+                                                      : 'a' + digit - 10);
+  }
+  return result;
+}
+
+static inline int AppendSignedDecimal(char **buff, const char *buff_end,
+                                      int64_t num) {
+  int result = 0;
+  if (num < 0) {
+    result += AppendChar(buff, buff_end, '-');
+    num = -num;
+  }
+  result += AppendUnsigned(buff, buff_end, (uint64_t)num, 10, 0);
+  return result;
+}
+
+static inline int AppendString(char **buff, const char *buff_end,
+                               const char *s) {
+  // Avoid library functions like stpcpy here.
+  RAW_CHECK(s);
+  int result = 0;
+  for (; *s; s++) {
+    result += AppendChar(buff, buff_end, *s);
+  }
+  return result;
+}
+
+static inline int AppendPointer(char **buff, const char *buff_end,
+                                uint64_t ptr_value) {
+  int result = 0;
+  result += AppendString(buff, buff_end, "0x");
+  result += AppendUnsigned(buff, buff_end, ptr_value, 16,
+                           (__WORDSIZE == 64) ? 12 : 8);
+  return result;
+}
+
+static int VSNPrintf(char *buff, int buff_length,
+                     const char *format, va_list args) {
+  static const char *kPrintfFormatsHelp = "Supported Printf formats: "
+                                          "%%[z]{d,u,x}; %%p; %%s";
+  RAW_CHECK(format);
+  RAW_CHECK(buff_length > 0);
+  const char *buff_end = &buff[buff_length - 1];
+  const char *cur = format;
+  int result = 0;
+  for (; *cur; cur++) {
+    if (*cur == '%') {
+      cur++;
+      bool have_z = (*cur == 'z');
+      cur += have_z;
+      int64_t dval;
+      uint64_t uval;
+      switch (*cur) {
+        case 'd': dval = have_z ? va_arg(args, intptr_t)
+                                : va_arg(args, int);
+                  result += AppendSignedDecimal(&buff, buff_end, dval);
+                  break;
+        case 'u': uval = have_z ? va_arg(args, size_t)
+                                : va_arg(args, unsigned);
+                  result += AppendUnsigned(&buff, buff_end, uval, 10, 0);
+                  break;
+        case 'x': uval = have_z ? va_arg(args, size_t)
+                                : va_arg(args, unsigned);
+                  result += AppendUnsigned(&buff, buff_end, uval, 16, 0);
+                  break;
+        case 'p': RAW_CHECK_MSG(!have_z, kPrintfFormatsHelp);
+                  result += AppendPointer(&buff, buff_end,
+                                          va_arg(args, uintptr_t));
+                  break;
+        case 's': RAW_CHECK_MSG(!have_z, kPrintfFormatsHelp);
+                  result += AppendString(&buff, buff_end, va_arg(args, char*));
+                  break;
+        default:  RAW_CHECK_MSG(false, kPrintfFormatsHelp);
+      }
+    } else {
+      result += AppendChar(&buff, buff_end, *cur);
+    }
+  }
+  RAW_CHECK(buff <= buff_end);
+  AppendChar(&buff, buff_end + 1, '\0');
+  return result;
+}
+
+void Printf(const char *format, ...) {
+  const int kLen = 1024 * 4;
+  char buffer[kLen];
+  va_list args;
+  va_start(args, format);
+  int needed_length = VSNPrintf(buffer, kLen, format, args);
+  va_end(args);
+  RAW_CHECK_MSG(needed_length < kLen, "Buffer in Printf is too short!\n");
+  RawWrite(buffer);
+}
+
+// Writes at most "length" symbols to "buffer" (including trailing '\0').
+// Returns the number of symbols that should have been written to buffer
+// (not including trailing '\0'). Thus, the string is truncated
+// iff return value is not less than "length".
+int SNPrintf(char *buffer, size_t length, const char *format, ...) {
+  va_list args;
+  va_start(args, format);
+  int needed_length = VSNPrintf(buffer, length, format, args);
+  va_end(args);
+  return needed_length;
+}
+
+// Like Printf, but prints the current PID before the output string.
+void Report(const char *format, ...) {
+  const int kLen = 1024 * 4;
+  char buffer[kLen];
+  int needed_length = SNPrintf(buffer, kLen, "==%d== ", GetPid());
+  RAW_CHECK_MSG(needed_length < kLen, "Buffer in Report is too short!\n");
+  va_list args;
+  va_start(args, format);
+  needed_length += VSNPrintf(buffer + needed_length, kLen - needed_length,
+                             format, args);
+  va_end(args);
+  RAW_CHECK_MSG(needed_length < kLen, "Buffer in Report is too short!\n");
+  RawWrite(buffer);
+}
+
+int SScanf(const char *str, const char *format, ...) {
+#ifndef _WIN32
+  va_list args;
+  va_start(args, format);
+  int res = vsscanf(str, format, args);
+  va_end(args);
+  return res;
+#else
+  UNIMPLEMENTED();
+  return -1;
+#endif
+}
+
+}  // namespace __asan
diff --git a/lib/asan/asan_procmaps.h b/lib/asan/asan_procmaps.h
new file mode 100644
index 0000000..5ae5fb2
--- /dev/null
+++ b/lib/asan/asan_procmaps.h
@@ -0,0 +1,71 @@
+//===-- asan_process.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Information about the process mappings.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_PROCMAPS_H
+#define ASAN_PROCMAPS_H
+
+#include "asan_internal.h"
+
+namespace __asan {
+
+class AsanProcMaps {
+ public:
+  AsanProcMaps();
+  bool Next(uintptr_t *start, uintptr_t *end, uintptr_t *offset,
+            char filename[], size_t filename_size);
+  void Reset();
+  // Gets the object file name and the offset in that object for a given
+  // address 'addr'. Returns true on success.
+  bool GetObjectNameAndOffset(uintptr_t addr, uintptr_t *offset,
+                              char filename[], size_t filename_size);
+  ~AsanProcMaps();
+ private:
+  // Default implementation of GetObjectNameAndOffset.
+  // Quite slow, because it iterates through the whole process map for each
+  // lookup.
+  bool IterateForObjectNameAndOffset(uintptr_t addr, uintptr_t *offset,
+                                     char filename[], size_t filename_size) {
+    Reset();
+    uintptr_t start, end, file_offset;
+    for (int i = 0; Next(&start, &end, &file_offset, filename, filename_size);
+         i++) {
+      if (addr >= start && addr < end) {
+        // Don't subtract 'start' for the first entry. Don't ask me why.
+        *offset = (addr - (i ? start : 0)) + file_offset;
+        return true;
+      }
+    }
+    if (filename_size)
+      filename[0] = '\0';
+    return false;
+  }
+
+#if defined __linux__
+  char *proc_self_maps_buff_;
+  size_t proc_self_maps_buff_mmaped_size_;
+  size_t proc_self_maps_buff_len_;
+  char *current_;
+#elif defined __APPLE__
+  template<uint32_t kLCSegment, typename SegmentCommand>
+  bool NextSegmentLoad(uintptr_t *start, uintptr_t *end, uintptr_t *offset,
+                       char filename[], size_t filename_size);
+  int current_image_;
+  uint32_t current_magic_;
+  int current_load_cmd_count_;
+  char *current_load_cmd_addr_;
+#endif
+};
+
+}  // namespace __asan
+
+#endif  // ASAN_PROCMAPS_H
diff --git a/lib/asan/asan_rtl.cc b/lib/asan/asan_rtl.cc
new file mode 100644
index 0000000..366e109
--- /dev/null
+++ b/lib/asan/asan_rtl.cc
@@ -0,0 +1,561 @@
+//===-- asan_rtl.cc ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Main file of the ASan run-time library.
+//===----------------------------------------------------------------------===//
+#include "asan_allocator.h"
+#include "asan_interceptors.h"
+#include "asan_interface.h"
+#include "asan_internal.h"
+#include "asan_lock.h"
+#include "asan_mapping.h"
+#include "asan_stack.h"
+#include "asan_stats.h"
+#include "asan_thread.h"
+#include "asan_thread_registry.h"
+
+namespace __asan {
+
+// -------------------------- Flags ------------------------- {{{1
+static const size_t kMallocContextSize = 30;
+static int    FLAG_atexit;
+
+size_t FLAG_redzone;  // power of two, >= 32
+size_t FLAG_quarantine_size;
+int    FLAG_demangle;
+bool   FLAG_symbolize;
+int    FLAG_v;
+int    FLAG_debug;
+bool   FLAG_poison_shadow;
+int    FLAG_report_globals;
+size_t FLAG_malloc_context_size = kMallocContextSize;
+uintptr_t FLAG_large_malloc;
+bool   FLAG_handle_segv;
+bool   FLAG_use_sigaltstack;
+bool   FLAG_replace_str;
+bool   FLAG_replace_intrin;
+bool   FLAG_replace_cfallocator;  // Used on Mac only.
+size_t FLAG_max_malloc_fill_size = 0;
+bool   FLAG_use_fake_stack;
+bool   FLAG_abort_on_error;
+int    FLAG_exitcode = ASAN_DEFAULT_FAILURE_EXITCODE;
+bool   FLAG_allow_user_poisoning;
+int    FLAG_sleep_before_dying;
+bool   FLAG_unmap_shadow_on_exit;
+bool   FLAG_disable_core;
+
+// -------------------------- Globals --------------------- {{{1
+int asan_inited;
+bool asan_init_is_running;
+static void (*death_callback)(void);
+static void (*error_report_callback)(const char*);
+char *error_message_buffer = NULL;
+size_t error_message_buffer_pos = 0;
+size_t error_message_buffer_size = 0;
+
+// -------------------------- Misc ---------------- {{{1
+void ShowStatsAndAbort() {
+  __asan_print_accumulated_stats();
+  AsanDie();
+}
+
+static void PrintBytes(const char *before, uintptr_t *a) {
+  uint8_t *bytes = (uint8_t*)a;
+  size_t byte_num = (__WORDSIZE) / 8;
+  Printf("%s%p:", before, (void*)a);
+  for (size_t i = 0; i < byte_num; i++) {
+    Printf(" %x%x", bytes[i] >> 4, bytes[i] & 15);
+  }
+  Printf("\n");
+}
+
+size_t ReadFileToBuffer(const char *file_name, char **buff,
+                         size_t *buff_size, size_t max_len) {
+  const size_t kMinFileLen = kPageSize;
+  size_t read_len = 0;
+  *buff = 0;
+  *buff_size = 0;
+  // The files we usually open are not seekable, so try different buffer sizes.
+  for (size_t size = kMinFileLen; size <= max_len; size *= 2) {
+    int fd = AsanOpenReadonly(file_name);
+    if (fd < 0) return 0;
+    AsanUnmapOrDie(*buff, *buff_size);
+    *buff = (char*)AsanMmapSomewhereOrDie(size, __FUNCTION__);
+    *buff_size = size;
+    // Read up to one page at a time.
+    read_len = 0;
+    bool reached_eof = false;
+    while (read_len + kPageSize <= size) {
+      size_t just_read = AsanRead(fd, *buff + read_len, kPageSize);
+      if (just_read == 0) {
+        reached_eof = true;
+        break;
+      }
+      read_len += just_read;
+    }
+    AsanClose(fd);
+    if (reached_eof)  // We've read the whole file.
+      break;
+  }
+  return read_len;
+}
+
+void AsanDie() {
+  static int num_calls = 0;
+  if (AtomicInc(&num_calls) > 1) {
+    // Don't die twice - run a busy loop.
+    while (1) { }
+  }
+  if (FLAG_sleep_before_dying) {
+    Report("Sleeping for %d second(s)\n", FLAG_sleep_before_dying);
+    SleepForSeconds(FLAG_sleep_before_dying);
+  }
+  if (FLAG_unmap_shadow_on_exit)
+    AsanUnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
+  if (death_callback)
+    death_callback();
+  if (FLAG_abort_on_error)
+    Abort();
+  Exit(FLAG_exitcode);
+}
+
+// ---------------------- mmap -------------------- {{{1
+void OutOfMemoryMessageAndDie(const char *mem_type, size_t size) {
+  Report("ERROR: AddressSanitizer failed to allocate "
+         "0x%zx (%zd) bytes of %s\n",
+         size, size, mem_type);
+  PRINT_CURRENT_STACK();
+  ShowStatsAndAbort();
+}
+
+// Reserve memory range [beg, end].
+static void ReserveShadowMemoryRange(uintptr_t beg, uintptr_t end) {
+  CHECK((beg % kPageSize) == 0);
+  CHECK(((end + 1) % kPageSize) == 0);
+  size_t size = end - beg + 1;
+  void *res = AsanMmapFixedNoReserve(beg, size);
+  CHECK(res == (void*)beg && "ReserveShadowMemoryRange failed");
+}
+
+// ---------------------- LowLevelAllocator ------------- {{{1
+void *LowLevelAllocator::Allocate(size_t size) {
+  CHECK((size & (size - 1)) == 0 && "size must be a power of two");
+  if (allocated_end_ - allocated_current_ < size) {
+    size_t size_to_allocate = Max(size, kPageSize);
+    allocated_current_ =
+        (char*)AsanMmapSomewhereOrDie(size_to_allocate, __FUNCTION__);
+    allocated_end_ = allocated_current_ + size_to_allocate;
+    PoisonShadow((uintptr_t)allocated_current_, size_to_allocate,
+                 kAsanInternalHeapMagic);
+  }
+  CHECK(allocated_end_ - allocated_current_ >= size);
+  void *res = allocated_current_;
+  allocated_current_ += size;
+  return res;
+}
+
+// ---------------------- DescribeAddress -------------------- {{{1
+static bool DescribeStackAddress(uintptr_t addr, uintptr_t access_size) {
+  AsanThread *t = asanThreadRegistry().FindThreadByStackAddress(addr);
+  if (!t) return false;
+  const intptr_t kBufSize = 4095;
+  char buf[kBufSize];
+  uintptr_t offset = 0;
+  const char *frame_descr = t->GetFrameNameByAddr(addr, &offset);
+  // This string is created by the compiler and has the following form:
+  // "FunctioName n alloc_1 alloc_2 ... alloc_n"
+  // where alloc_i looks like "offset size len ObjectName ".
+  CHECK(frame_descr);
+  // Report the function name and the offset.
+  const char *name_end = internal_strchr(frame_descr, ' ');
+  CHECK(name_end);
+  buf[0] = 0;
+  internal_strncat(buf, frame_descr,
+                   Min(kBufSize,
+                       static_cast<intptr_t>(name_end - frame_descr)));
+  Printf("Address %p is located at offset %zu "
+         "in frame <%s> of T%d's stack:\n",
+         addr, offset, buf, t->tid());
+  // Report the number of stack objects.
+  char *p;
+  size_t n_objects = internal_simple_strtoll(name_end, &p, 10);
+  CHECK(n_objects > 0);
+  Printf("  This frame has %zu object(s):\n", n_objects);
+  // Report all objects in this frame.
+  for (size_t i = 0; i < n_objects; i++) {
+    size_t beg, size;
+    intptr_t len;
+    beg  = internal_simple_strtoll(p, &p, 10);
+    size = internal_simple_strtoll(p, &p, 10);
+    len  = internal_simple_strtoll(p, &p, 10);
+    if (beg <= 0 || size <= 0 || len < 0 || *p != ' ') {
+      Printf("AddressSanitizer can't parse the stack frame descriptor: |%s|\n",
+             frame_descr);
+      break;
+    }
+    p++;
+    buf[0] = 0;
+    internal_strncat(buf, p, Min(kBufSize, len));
+    p += len;
+    Printf("    [%zu, %zu) '%s'\n", beg, beg + size, buf);
+  }
+  Printf("HINT: this may be a false positive if your program uses "
+         "some custom stack unwind mechanism\n"
+         "      (longjmp and C++ exceptions *are* supported)\n");
+  t->summary()->Announce();
+  return true;
+}
+
+static NOINLINE void DescribeAddress(uintptr_t addr, uintptr_t access_size) {
+  // Check if this is a global.
+  if (DescribeAddrIfGlobal(addr))
+    return;
+
+  if (DescribeStackAddress(addr, access_size))
+    return;
+
+  // finally, check if this is a heap.
+  DescribeHeapAddress(addr, access_size);
+}
+
+// -------------------------- Run-time entry ------------------- {{{1
+// exported functions
+#define ASAN_REPORT_ERROR(type, is_write, size)                     \
+extern "C" NOINLINE ASAN_INTERFACE_ATTRIBUTE                        \
+void __asan_report_ ## type ## size(uintptr_t addr);                \
+void __asan_report_ ## type ## size(uintptr_t addr) {               \
+  GET_CALLER_PC_BP_SP;                                              \
+  __asan_report_error(pc, bp, sp, addr, is_write, size);            \
+}
+
+ASAN_REPORT_ERROR(load, false, 1)
+ASAN_REPORT_ERROR(load, false, 2)
+ASAN_REPORT_ERROR(load, false, 4)
+ASAN_REPORT_ERROR(load, false, 8)
+ASAN_REPORT_ERROR(load, false, 16)
+ASAN_REPORT_ERROR(store, true, 1)
+ASAN_REPORT_ERROR(store, true, 2)
+ASAN_REPORT_ERROR(store, true, 4)
+ASAN_REPORT_ERROR(store, true, 8)
+ASAN_REPORT_ERROR(store, true, 16)
+
+// Force the linker to keep the symbols for various ASan interface functions.
+// We want to keep those in the executable in order to let the instrumented
+// dynamic libraries access the symbol even if it is not used by the executable
+// itself. This should help if the build system is removing dead code at link
+// time.
+static NOINLINE void force_interface_symbols() {
+  volatile int fake_condition = 0;  // prevent dead condition elimination.
+  if (fake_condition) {
+    __asan_report_load1(0);
+    __asan_report_load2(0);
+    __asan_report_load4(0);
+    __asan_report_load8(0);
+    __asan_report_load16(0);
+    __asan_report_store1(0);
+    __asan_report_store2(0);
+    __asan_report_store4(0);
+    __asan_report_store8(0);
+    __asan_report_store16(0);
+    __asan_register_global(0, 0, NULL);
+    __asan_register_globals(NULL, 0);
+    __asan_unregister_globals(NULL, 0);
+    __asan_set_death_callback(NULL);
+    __asan_set_error_report_callback(NULL);
+    __asan_handle_no_return();
+  }
+}
+
+// -------------------------- Init ------------------- {{{1
+static int64_t IntFlagValue(const char *flags, const char *flag,
+                            int64_t default_val) {
+  if (!flags) return default_val;
+  const char *str = internal_strstr(flags, flag);
+  if (!str) return default_val;
+  return internal_atoll(str + internal_strlen(flag));
+}
+
+static void asan_atexit() {
+  Printf("AddressSanitizer exit stats:\n");
+  __asan_print_accumulated_stats();
+}
+
+void CheckFailed(const char *cond, const char *file, int line) {
+  Report("CHECK failed: %s at %s:%d\n", cond, file, line);
+  PRINT_CURRENT_STACK();
+  ShowStatsAndAbort();
+}
+
+}  // namespace __asan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+int __asan_set_error_exit_code(int exit_code) {
+  int old = FLAG_exitcode;
+  FLAG_exitcode = exit_code;
+  return old;
+}
+
+void NOINLINE __asan_handle_no_return() {
+  int local_stack;
+  AsanThread *curr_thread = asanThreadRegistry().GetCurrent();
+  CHECK(curr_thread);
+  uintptr_t top = curr_thread->stack_top();
+  uintptr_t bottom = ((uintptr_t)&local_stack - kPageSize) & ~(kPageSize-1);
+  PoisonShadow(bottom, top - bottom, 0);
+}
+
+void NOINLINE __asan_set_death_callback(void (*callback)(void)) {
+  death_callback = callback;
+}
+
+void NOINLINE __asan_set_error_report_callback(void (*callback)(const char*)) {
+  error_report_callback = callback;
+  if (callback) {
+    error_message_buffer_size = 1 << 14;
+    error_message_buffer =
+        (char*)AsanMmapSomewhereOrDie(error_message_buffer_size, __FUNCTION__);
+    error_message_buffer_pos = 0;
+  }
+}
+
+void __asan_report_error(uintptr_t pc, uintptr_t bp, uintptr_t sp,
+                         uintptr_t addr, bool is_write, size_t access_size) {
+  // Do not print more than one report, otherwise they will mix up.
+  static int num_calls = 0;
+  if (AtomicInc(&num_calls) > 1) return;
+
+  Printf("=================================================================\n");
+  const char *bug_descr = "unknown-crash";
+  if (AddrIsInMem(addr)) {
+    uint8_t *shadow_addr = (uint8_t*)MemToShadow(addr);
+    // If we are accessing 16 bytes, look at the second shadow byte.
+    if (*shadow_addr == 0 && access_size > SHADOW_GRANULARITY)
+      shadow_addr++;
+    // If we are in the partial right redzone, look at the next shadow byte.
+    if (*shadow_addr > 0 && *shadow_addr < 128)
+      shadow_addr++;
+    switch (*shadow_addr) {
+      case kAsanHeapLeftRedzoneMagic:
+      case kAsanHeapRightRedzoneMagic:
+        bug_descr = "heap-buffer-overflow";
+        break;
+      case kAsanHeapFreeMagic:
+        bug_descr = "heap-use-after-free";
+        break;
+      case kAsanStackLeftRedzoneMagic:
+        bug_descr = "stack-buffer-underflow";
+        break;
+      case kAsanStackMidRedzoneMagic:
+      case kAsanStackRightRedzoneMagic:
+      case kAsanStackPartialRedzoneMagic:
+        bug_descr = "stack-buffer-overflow";
+        break;
+      case kAsanStackAfterReturnMagic:
+        bug_descr = "stack-use-after-return";
+        break;
+      case kAsanUserPoisonedMemoryMagic:
+        bug_descr = "use-after-poison";
+        break;
+      case kAsanGlobalRedzoneMagic:
+        bug_descr = "global-buffer-overflow";
+        break;
+    }
+  }
+
+  AsanThread *curr_thread = asanThreadRegistry().GetCurrent();
+  int curr_tid = asanThreadRegistry().GetCurrentTidOrMinusOne();
+
+  if (curr_thread) {
+    // We started reporting an error message. Stop using the fake stack
+    // in case we will call an instrumented function from a symbolizer.
+    curr_thread->fake_stack().StopUsingFakeStack();
+  }
+
+  Report("ERROR: AddressSanitizer %s on address "
+         "%p at pc 0x%zx bp 0x%zx sp 0x%zx\n",
+         bug_descr, addr, pc, bp, sp);
+
+  Printf("%s of size %zu at %p thread T%d\n",
+         access_size ? (is_write ? "WRITE" : "READ") : "ACCESS",
+         access_size, addr, curr_tid);
+
+  if (FLAG_debug) {
+    PrintBytes("PC: ", (uintptr_t*)pc);
+  }
+
+  GET_STACK_TRACE_WITH_PC_AND_BP(kStackTraceMax, pc, bp);
+  stack.PrintStack();
+
+  CHECK(AddrIsInMem(addr));
+
+  DescribeAddress(addr, access_size);
+
+  uintptr_t shadow_addr = MemToShadow(addr);
+  Report("ABORTING\n");
+  __asan_print_accumulated_stats();
+  Printf("Shadow byte and word:\n");
+  Printf("  %p: %x\n", shadow_addr, *(unsigned char*)shadow_addr);
+  uintptr_t aligned_shadow = shadow_addr & ~(kWordSize - 1);
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow));
+  Printf("More shadow bytes:\n");
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow-4*kWordSize));
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow-3*kWordSize));
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow-2*kWordSize));
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow-1*kWordSize));
+  PrintBytes("=>", (uintptr_t*)(aligned_shadow+0*kWordSize));
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow+1*kWordSize));
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow+2*kWordSize));
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow+3*kWordSize));
+  PrintBytes("  ", (uintptr_t*)(aligned_shadow+4*kWordSize));
+  if (error_report_callback) {
+    error_report_callback(error_message_buffer);
+  }
+  AsanDie();
+}
+
+void __asan_init() {
+  if (asan_inited) return;
+  asan_init_is_running = true;
+
+  // Make sure we are not statically linked.
+  AsanDoesNotSupportStaticLinkage();
+
+  // flags
+  const char *options = AsanGetEnv("ASAN_OPTIONS");
+  FLAG_malloc_context_size =
+      IntFlagValue(options, "malloc_context_size=", kMallocContextSize);
+  CHECK(FLAG_malloc_context_size <= kMallocContextSize);
+
+  FLAG_max_malloc_fill_size =
+      IntFlagValue(options, "max_malloc_fill_size=", 0);
+
+  FLAG_v = IntFlagValue(options, "verbosity=", 0);
+
+  FLAG_redzone = IntFlagValue(options, "redzone=",
+      (ASAN_LOW_MEMORY) ? 64 : 128);
+  CHECK(FLAG_redzone >= 32);
+  CHECK((FLAG_redzone & (FLAG_redzone - 1)) == 0);
+
+  FLAG_atexit = IntFlagValue(options, "atexit=", 0);
+  FLAG_poison_shadow = IntFlagValue(options, "poison_shadow=", 1);
+  FLAG_report_globals = IntFlagValue(options, "report_globals=", 1);
+  FLAG_handle_segv = IntFlagValue(options, "handle_segv=", ASAN_NEEDS_SEGV);
+  FLAG_use_sigaltstack = IntFlagValue(options, "use_sigaltstack=", 0);
+  FLAG_symbolize = IntFlagValue(options, "symbolize=", 1);
+  FLAG_demangle = IntFlagValue(options, "demangle=", 1);
+  FLAG_debug = IntFlagValue(options, "debug=", 0);
+  FLAG_replace_cfallocator = IntFlagValue(options, "replace_cfallocator=", 1);
+  FLAG_replace_str = IntFlagValue(options, "replace_str=", 1);
+  FLAG_replace_intrin = IntFlagValue(options, "replace_intrin=", 1);
+  FLAG_use_fake_stack = IntFlagValue(options, "use_fake_stack=", 1);
+  FLAG_exitcode = IntFlagValue(options, "exitcode=",
+                               ASAN_DEFAULT_FAILURE_EXITCODE);
+  FLAG_allow_user_poisoning = IntFlagValue(options,
+                                           "allow_user_poisoning=", 1);
+  FLAG_sleep_before_dying = IntFlagValue(options, "sleep_before_dying=", 0);
+  FLAG_abort_on_error = IntFlagValue(options, "abort_on_error=", 0);
+  FLAG_unmap_shadow_on_exit = IntFlagValue(options, "unmap_shadow_on_exit=", 0);
+  // By default, disable core dumper on 64-bit --
+  // it makes little sense to dump 16T+ core.
+  FLAG_disable_core = IntFlagValue(options, "disable_core=", __WORDSIZE == 64);
+
+  FLAG_quarantine_size = IntFlagValue(options, "quarantine_size=",
+      (ASAN_LOW_MEMORY) ? 1UL << 24 : 1UL << 28);
+
+  if (FLAG_v) {
+    Report("Parsed ASAN_OPTIONS: %s\n", options);
+  }
+
+  if (FLAG_atexit) {
+    Atexit(asan_atexit);
+  }
+
+  // interceptors
+  InitializeAsanInterceptors();
+
+  ReplaceSystemMalloc();
+  ReplaceOperatorsNewAndDelete();
+
+  if (FLAG_v) {
+    Printf("|| `[%p, %p]` || HighMem    ||\n", kHighMemBeg, kHighMemEnd);
+    Printf("|| `[%p, %p]` || HighShadow ||\n",
+           kHighShadowBeg, kHighShadowEnd);
+    Printf("|| `[%p, %p]` || ShadowGap  ||\n",
+           kShadowGapBeg, kShadowGapEnd);
+    Printf("|| `[%p, %p]` || LowShadow  ||\n",
+           kLowShadowBeg, kLowShadowEnd);
+    Printf("|| `[%p, %p]` || LowMem     ||\n", kLowMemBeg, kLowMemEnd);
+    Printf("MemToShadow(shadow): %p %p %p %p\n",
+           MEM_TO_SHADOW(kLowShadowBeg),
+           MEM_TO_SHADOW(kLowShadowEnd),
+           MEM_TO_SHADOW(kHighShadowBeg),
+           MEM_TO_SHADOW(kHighShadowEnd));
+    Printf("red_zone=%zu\n", (size_t)FLAG_redzone);
+    Printf("malloc_context_size=%zu\n", (size_t)FLAG_malloc_context_size);
+
+    Printf("SHADOW_SCALE: %zx\n", (size_t)SHADOW_SCALE);
+    Printf("SHADOW_GRANULARITY: %zx\n", (size_t)SHADOW_GRANULARITY);
+    Printf("SHADOW_OFFSET: %zx\n", (size_t)SHADOW_OFFSET);
+    CHECK(SHADOW_SCALE >= 3 && SHADOW_SCALE <= 7);
+  }
+
+  if (FLAG_disable_core) {
+    AsanDisableCoreDumper();
+  }
+
+  if (AsanShadowRangeIsAvailable()) {
+    if (kLowShadowBeg != kLowShadowEnd) {
+      // mmap the low shadow plus at least one page.
+      ReserveShadowMemoryRange(kLowShadowBeg - kMmapGranularity, kLowShadowEnd);
+    }
+    // mmap the high shadow.
+    ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd);
+    // protect the gap
+    void *prot = AsanMprotect(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
+    CHECK(prot == (void*)kShadowGapBeg);
+  } else {
+    Report("Shadow memory range interleaves with an existing memory mapping. "
+           "ASan cannot proceed correctly. ABORTING.\n");
+    AsanDumpProcessMap();
+    AsanDie();
+  }
+
+  InstallSignalHandlers();
+
+  // On Linux AsanThread::ThreadStart() calls malloc() that's why asan_inited
+  // should be set to 1 prior to initializing the threads.
+  asan_inited = 1;
+  asan_init_is_running = false;
+
+  asanThreadRegistry().Init();
+  asanThreadRegistry().GetMain()->ThreadStart();
+  force_interface_symbols();  // no-op.
+
+  if (FLAG_v) {
+    Report("AddressSanitizer Init done\n");
+  }
+}
+
+#if defined(ASAN_USE_PREINIT_ARRAY)
+  // On Linux, we force __asan_init to be called before anyone else
+  // by placing it into .preinit_array section.
+  // FIXME: do we have anything like this on Mac?
+  __attribute__((section(".preinit_array")))
+    typeof(__asan_init) *__asan_preinit =__asan_init;
+#elif defined(_WIN32) && defined(_DLL)
+  // On Windows, when using dynamic CRT (/MD), we can put a pointer
+  // to __asan_init into the global list of C initializers.
+  // See crt0dat.c in the CRT sources for the details.
+  #pragma section(".CRT$XIB", long, read)  // NOLINT
+  __declspec(allocate(".CRT$XIB")) void (*__asan_preinit)() = __asan_init;
+#endif
diff --git a/lib/asan/asan_stack.cc b/lib/asan/asan_stack.cc
new file mode 100644
index 0000000..a8766bb
--- /dev/null
+++ b/lib/asan/asan_stack.cc
@@ -0,0 +1,190 @@
+//===-- asan_stack.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Code for ASan stack trace.
+//===----------------------------------------------------------------------===//
+#include "asan_interceptors.h"
+#include "asan_lock.h"
+#include "asan_procmaps.h"
+#include "asan_stack.h"
+#include "asan_thread.h"
+#include "asan_thread_registry.h"
+
+#ifdef ASAN_USE_EXTERNAL_SYMBOLIZER
+extern bool
+ASAN_USE_EXTERNAL_SYMBOLIZER(const void *pc, char *out, int out_size);
+#endif
+
+namespace __asan {
+
+// ----------------------- AsanStackTrace ----------------------------- {{{1
+#if defined(ASAN_USE_EXTERNAL_SYMBOLIZER)
+void AsanStackTrace::PrintStack(uintptr_t *addr, size_t size) {
+  for (size_t i = 0; i < size && addr[i]; i++) {
+    uintptr_t pc = addr[i];
+    char buff[4096];
+    ASAN_USE_EXTERNAL_SYMBOLIZER((void*)pc, buff, sizeof(buff));
+    Printf("  #%zu 0x%zx %s\n", i, pc, buff);
+  }
+}
+
+#else  // ASAN_USE_EXTERNAL_SYMBOLIZER
+void AsanStackTrace::PrintStack(uintptr_t *addr, size_t size) {
+  AsanProcMaps proc_maps;
+  for (size_t i = 0; i < size && addr[i]; i++) {
+    proc_maps.Reset();
+    uintptr_t pc = addr[i];
+    uintptr_t offset;
+    char filename[4096];
+    if (proc_maps.GetObjectNameAndOffset(pc, &offset,
+                                         filename, sizeof(filename))) {
+      Printf("    #%zu 0x%zx (%s+0x%zx)\n", i, pc, filename, offset);
+    } else {
+      Printf("    #%zu 0x%zx\n", i, pc);
+    }
+  }
+}
+#endif  // ASAN_USE_EXTERNAL_SYMBOLIZER
+
+uintptr_t AsanStackTrace::GetCurrentPc() {
+  return GET_CALLER_PC();
+}
+
+void AsanStackTrace::FastUnwindStack(uintptr_t pc, uintptr_t bp) {
+  CHECK(size == 0 && trace[0] == pc);
+  size = 1;
+  if (!asan_inited) return;
+  AsanThread *t = asanThreadRegistry().GetCurrent();
+  if (!t) return;
+  uintptr_t *frame = (uintptr_t*)bp;
+  uintptr_t *prev_frame = frame;
+  uintptr_t *top = (uintptr_t*)t->stack_top();
+  uintptr_t *bottom = (uintptr_t*)t->stack_bottom();
+  while (frame >= prev_frame &&
+         frame < top - 2 &&
+         frame > bottom &&
+         size < max_size) {
+    uintptr_t pc1 = frame[1];
+    if (pc1 != pc) {
+      trace[size++] = pc1;
+    }
+    prev_frame = frame;
+    frame = (uintptr_t*)frame[0];
+  }
+}
+
+// On 32-bits we don't compress stack traces.
+// On 64-bits we compress stack traces: if a given pc differes slightly from
+// the previous one, we record a 31-bit offset instead of the full pc.
+size_t AsanStackTrace::CompressStack(AsanStackTrace *stack,
+                                   uint32_t *compressed, size_t size) {
+#if __WORDSIZE == 32
+  // Don't compress, just copy.
+  size_t res = 0;
+  for (size_t i = 0; i < stack->size && i < size; i++) {
+    compressed[i] = stack->trace[i];
+    res++;
+  }
+  if (stack->size < size)
+    compressed[stack->size] = 0;
+#else  // 64 bits, compress.
+  uintptr_t prev_pc = 0;
+  const uintptr_t kMaxOffset = (1ULL << 30) - 1;
+  uintptr_t c_index = 0;
+  size_t res = 0;
+  for (size_t i = 0, n = stack->size; i < n; i++) {
+    uintptr_t pc = stack->trace[i];
+    if (!pc) break;
+    if ((int64_t)pc < 0) break;
+    // Printf("C pc[%zu] %zx\n", i, pc);
+    if (prev_pc - pc < kMaxOffset || pc - prev_pc < kMaxOffset) {
+      uintptr_t offset = (int64_t)(pc - prev_pc);
+      offset |= (1U << 31);
+      if (c_index >= size) break;
+      // Printf("C co[%zu] offset %zx\n", i, offset);
+      compressed[c_index++] = offset;
+    } else {
+      uintptr_t hi = pc >> 32;
+      uintptr_t lo = (pc << 32) >> 32;
+      CHECK((hi & (1 << 31)) == 0);
+      if (c_index + 1 >= size) break;
+      // Printf("C co[%zu] hi/lo: %zx %zx\n", c_index, hi, lo);
+      compressed[c_index++] = hi;
+      compressed[c_index++] = lo;
+    }
+    res++;
+    prev_pc = pc;
+  }
+  if (c_index < size)
+    compressed[c_index] = 0;
+  if (c_index + 1 < size)
+    compressed[c_index + 1] = 0;
+#endif  // __WORDSIZE
+
+  // debug-only code
+#if 0
+  AsanStackTrace check_stack;
+  UncompressStack(&check_stack, compressed, size);
+  if (res < check_stack.size) {
+    Printf("res %zu check_stack.size %zu; c_size %zu\n", res,
+           check_stack.size, size);
+  }
+  // |res| may be greater than check_stack.size, because
+  // UncompressStack(CompressStack(stack)) eliminates the 0x0 frames.
+  CHECK(res >= check_stack.size);
+  CHECK(0 == REAL(memcmp)(check_stack.trace, stack->trace,
+                          check_stack.size * sizeof(uintptr_t)));
+#endif
+
+  return res;
+}
+
+void AsanStackTrace::UncompressStack(AsanStackTrace *stack,
+                                     uint32_t *compressed, size_t size) {
+#if __WORDSIZE == 32
+  // Don't uncompress, just copy.
+  stack->size = 0;
+  for (size_t i = 0; i < size && i < kStackTraceMax; i++) {
+    if (!compressed[i]) break;
+    stack->size++;
+    stack->trace[i] = compressed[i];
+  }
+#else  // 64 bits, uncompress
+  uintptr_t prev_pc = 0;
+  stack->size = 0;
+  for (size_t i = 0; i < size && stack->size < kStackTraceMax; i++) {
+    uint32_t x = compressed[i];
+    uintptr_t pc = 0;
+    if (x & (1U << 31)) {
+      // Printf("U co[%zu] offset: %x\n", i, x);
+      // this is an offset
+      int32_t offset = x;
+      offset = (offset << 1) >> 1;  // remove the 31-byte and sign-extend.
+      pc = prev_pc + offset;
+      CHECK(pc);
+    } else {
+      // CHECK(i + 1 < size);
+      if (i + 1 >= size) break;
+      uintptr_t hi = x;
+      uintptr_t lo = compressed[i+1];
+      // Printf("U co[%zu] hi/lo: %zx %zx\n", i, hi, lo);
+      i++;
+      pc = (hi << 32) | lo;
+      if (!pc) break;
+    }
+    // Printf("U pc[%zu] %zx\n", stack->size, pc);
+    stack->trace[stack->size++] = pc;
+    prev_pc = pc;
+  }
+#endif  // __WORDSIZE
+}
+
+}  // namespace __asan
diff --git a/lib/asan/asan_stack.h b/lib/asan/asan_stack.h
new file mode 100644
index 0000000..252df93
--- /dev/null
+++ b/lib/asan/asan_stack.h
@@ -0,0 +1,104 @@
+//===-- asan_stack.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header for asan_stack.cc.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_STACK_H
+#define ASAN_STACK_H
+
+#include "asan_internal.h"
+
+namespace __asan {
+
+static const size_t kStackTraceMax = 64;
+
+struct AsanStackTrace {
+  size_t size;
+  size_t max_size;
+  uintptr_t trace[kStackTraceMax];
+  static void PrintStack(uintptr_t *addr, size_t size);
+  void PrintStack() {
+    PrintStack(this->trace, this->size);
+  }
+  void CopyTo(uintptr_t *dst, size_t dst_size) {
+    for (size_t i = 0; i < size && i < dst_size; i++)
+      dst[i] = trace[i];
+    for (size_t i = size; i < dst_size; i++)
+      dst[i] = 0;
+  }
+
+  void CopyFrom(uintptr_t *src, size_t src_size) {
+    size = src_size;
+    if (size > kStackTraceMax) size = kStackTraceMax;
+    for (size_t i = 0; i < size; i++) {
+      trace[i] = src[i];
+    }
+  }
+
+  void GetStackTrace(size_t max_s, uintptr_t pc, uintptr_t bp);
+
+  void FastUnwindStack(uintptr_t pc, uintptr_t bp);
+
+  static uintptr_t GetCurrentPc();
+
+  static size_t CompressStack(AsanStackTrace *stack,
+                            uint32_t *compressed, size_t size);
+  static void UncompressStack(AsanStackTrace *stack,
+                              uint32_t *compressed, size_t size);
+};
+
+}  // namespace __asan
+
+// Use this macro if you want to print stack trace with the caller
+// of the current function in the top frame.
+#define GET_CALLER_PC_BP_SP \
+  uintptr_t bp = GET_CURRENT_FRAME();              \
+  uintptr_t pc = GET_CALLER_PC();                  \
+  uintptr_t local_stack;                           \
+  uintptr_t sp = (uintptr_t)&local_stack;
+
+// Use this macro if you want to print stack trace with the current
+// function in the top frame.
+#define GET_CURRENT_PC_BP_SP \
+  uintptr_t bp = GET_CURRENT_FRAME();              \
+  uintptr_t pc = AsanStackTrace::GetCurrentPc();   \
+  uintptr_t local_stack;                           \
+  uintptr_t sp = (uintptr_t)&local_stack;
+
+// Get the stack trace with the given pc and bp.
+// The pc will be in the position 0 of the resulting stack trace.
+// The bp may refer to the current frame or to the caller's frame.
+// fast_unwind is currently unused.
+#define GET_STACK_TRACE_WITH_PC_AND_BP(max_s, pc, bp)               \
+  AsanStackTrace stack;                                             \
+  stack.GetStackTrace(max_s, pc, bp);                               \
+
+// NOTE: A Rule of thumb is to retrieve stack trace in the interceptors
+// as early as possible (in functions exposed to the user), as we generally
+// don't want stack trace to contain functions from ASan internals.
+
+#define GET_STACK_TRACE_HERE(max_size)                        \
+  GET_STACK_TRACE_WITH_PC_AND_BP(max_size,                    \
+      AsanStackTrace::GetCurrentPc(), GET_CURRENT_FRAME())    \
+
+#define GET_STACK_TRACE_HERE_FOR_MALLOC                             \
+  GET_STACK_TRACE_HERE(FLAG_malloc_context_size)
+
+#define GET_STACK_TRACE_HERE_FOR_FREE(ptr)                          \
+  GET_STACK_TRACE_HERE(FLAG_malloc_context_size)
+
+#define PRINT_CURRENT_STACK()                    \
+  {                                              \
+    GET_STACK_TRACE_HERE(kStackTraceMax);        \
+    stack.PrintStack();                          \
+  }                                              \
+
+#endif  // ASAN_STACK_H
diff --git a/lib/asan/asan_stats.cc b/lib/asan/asan_stats.cc
new file mode 100644
index 0000000..9dbd07d
--- /dev/null
+++ b/lib/asan/asan_stats.cc
@@ -0,0 +1,88 @@
+//===-- asan_stats.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Code related to statistics collected by AddressSanitizer.
+//===----------------------------------------------------------------------===//
+#include "asan_interceptors.h"
+#include "asan_interface.h"
+#include "asan_internal.h"
+#include "asan_lock.h"
+#include "asan_stats.h"
+#include "asan_thread_registry.h"
+
+namespace __asan {
+
+AsanStats::AsanStats() {
+  CHECK(REAL(memset) != NULL);
+  REAL(memset)(this, 0, sizeof(AsanStats));
+}
+
+static void PrintMallocStatsArray(const char *prefix,
+                                  size_t (&array)[kNumberOfSizeClasses]) {
+  Printf("%s", prefix);
+  for (size_t i = 0; i < kNumberOfSizeClasses; i++) {
+    if (!array[i]) continue;
+    Printf("%zu:%zu; ", i, array[i]);
+  }
+  Printf("\n");
+}
+
+void AsanStats::Print() {
+  Printf("Stats: %zuM malloced (%zuM for red zones) by %zu calls\n",
+         malloced>>20, malloced_redzones>>20, mallocs);
+  Printf("Stats: %zuM realloced by %zu calls\n", realloced>>20, reallocs);
+  Printf("Stats: %zuM freed by %zu calls\n", freed>>20, frees);
+  Printf("Stats: %zuM really freed by %zu calls\n",
+         really_freed>>20, real_frees);
+  Printf("Stats: %zuM (%zu full pages) mmaped in %zu calls\n",
+         mmaped>>20, mmaped / kPageSize, mmaps);
+
+  PrintMallocStatsArray("  mmaps   by size class: ", mmaped_by_size);
+  PrintMallocStatsArray("  mallocs by size class: ", malloced_by_size);
+  PrintMallocStatsArray("  frees   by size class: ", freed_by_size);
+  PrintMallocStatsArray("  rfrees  by size class: ", really_freed_by_size);
+  Printf("Stats: malloc large: %zu small slow: %zu\n",
+         malloc_large, malloc_small_slow);
+}
+
+static AsanLock print_lock(LINKER_INITIALIZED);
+
+static void PrintAccumulatedStats() {
+  AsanStats stats = asanThreadRegistry().GetAccumulatedStats();
+  // Use lock to keep reports from mixing up.
+  ScopedLock lock(&print_lock);
+  stats.Print();
+}
+
+}  // namespace __asan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+size_t __asan_get_current_allocated_bytes() {
+  return asanThreadRegistry().GetCurrentAllocatedBytes();
+}
+
+size_t __asan_get_heap_size() {
+  return asanThreadRegistry().GetHeapSize();
+}
+
+size_t __asan_get_free_bytes() {
+  return asanThreadRegistry().GetFreeBytes();
+}
+
+size_t __asan_get_unmapped_bytes() {
+  return 0;
+}
+
+void __asan_print_accumulated_stats() {
+  PrintAccumulatedStats();
+}
diff --git a/lib/asan/asan_stats.h b/lib/asan/asan_stats.h
new file mode 100644
index 0000000..d6dd084
--- /dev/null
+++ b/lib/asan/asan_stats.h
@@ -0,0 +1,59 @@
+//===-- asan_stats.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header for statistics.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_STATS_H
+#define ASAN_STATS_H
+
+#include "asan_allocator.h"
+#include "asan_internal.h"
+
+namespace __asan {
+
+// AsanStats struct is NOT thread-safe.
+// Each AsanThread has its own AsanStats, which are sometimes flushed
+// to the accumulated AsanStats.
+struct AsanStats {
+  // AsanStats must be a struct consisting of size_t fields only.
+  // When merging two AsanStats structs, we treat them as arrays of size_t.
+  size_t mallocs;
+  size_t malloced;
+  size_t malloced_redzones;
+  size_t frees;
+  size_t freed;
+  size_t real_frees;
+  size_t really_freed;
+  size_t really_freed_redzones;
+  size_t reallocs;
+  size_t realloced;
+  size_t mmaps;
+  size_t mmaped;
+  size_t mmaped_by_size[kNumberOfSizeClasses];
+  size_t malloced_by_size[kNumberOfSizeClasses];
+  size_t freed_by_size[kNumberOfSizeClasses];
+  size_t really_freed_by_size[kNumberOfSizeClasses];
+
+  size_t malloc_large;
+  size_t malloc_small_slow;
+
+  // Ctor for global AsanStats (accumulated stats and main thread stats).
+  explicit AsanStats(LinkerInitialized) { }
+  // Default ctor for thread-local stats.
+  AsanStats();
+
+  // Prints formatted stats to stderr.
+  void Print();
+};
+
+}  // namespace __asan
+
+#endif  // ASAN_STATS_H
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc
new file mode 100644
index 0000000..315db7e
--- /dev/null
+++ b/lib/asan/asan_thread.cc
@@ -0,0 +1,132 @@
+//===-- asan_thread.cc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Thread-related code.
+//===----------------------------------------------------------------------===//
+#include "asan_allocator.h"
+#include "asan_interceptors.h"
+#include "asan_procmaps.h"
+#include "asan_stack.h"
+#include "asan_thread.h"
+#include "asan_thread_registry.h"
+#include "asan_mapping.h"
+
+namespace __asan {
+
+AsanThread::AsanThread(LinkerInitialized x)
+    : fake_stack_(x),
+      malloc_storage_(x),
+      stats_(x) { }
+
+AsanThread *AsanThread::Create(int parent_tid, thread_callback_t start_routine,
+                               void *arg, AsanStackTrace *stack) {
+  size_t size = RoundUpTo(sizeof(AsanThread), kPageSize);
+  AsanThread *thread = (AsanThread*)AsanMmapSomewhereOrDie(size, __FUNCTION__);
+  thread->start_routine_ = start_routine;
+  thread->arg_ = arg;
+
+  AsanThreadSummary *summary = new AsanThreadSummary(parent_tid, stack);
+  summary->set_thread(thread);
+  thread->set_summary(summary);
+
+  return thread;
+}
+
+void AsanThreadSummary::TSDDtor(void *tsd) {
+  AsanThreadSummary *summary = (AsanThreadSummary*)tsd;
+  if (FLAG_v >= 1) {
+    Report("T%d TSDDtor\n", summary->tid());
+  }
+  if (summary->thread()) {
+    summary->thread()->Destroy();
+  }
+}
+
+void AsanThread::Destroy() {
+  if (FLAG_v >= 1) {
+    Report("T%d exited\n", tid());
+  }
+
+  asanThreadRegistry().UnregisterThread(this);
+  CHECK(summary()->thread() == NULL);
+  // We also clear the shadow on thread destruction because
+  // some code may still be executing in later TSD destructors
+  // and we don't want it to have any poisoned stack.
+  ClearShadowForThreadStack();
+  fake_stack().Cleanup();
+  size_t size = RoundUpTo(sizeof(AsanThread), kPageSize);
+  AsanUnmapOrDie(this, size);
+}
+
+void AsanThread::Init() {
+  SetThreadStackTopAndBottom();
+  CHECK(AddrIsInMem(stack_bottom_));
+  CHECK(AddrIsInMem(stack_top_));
+  ClearShadowForThreadStack();
+  if (FLAG_v >= 1) {
+    int local = 0;
+    Report("T%d: stack [%p,%p) size 0x%zx; local=%p\n",
+           tid(), stack_bottom_, stack_top_,
+           stack_top_ - stack_bottom_, &local);
+  }
+  fake_stack_.Init(stack_size());
+}
+
+thread_return_t AsanThread::ThreadStart() {
+  Init();
+  if (FLAG_use_sigaltstack) SetAlternateSignalStack();
+
+  if (!start_routine_) {
+    // start_routine_ == NULL if we're on the main thread or on one of the
+    // OS X libdispatch worker threads. But nobody is supposed to call
+    // ThreadStart() for the worker threads.
+    CHECK(tid() == 0);
+    return 0;
+  }
+
+  thread_return_t res = start_routine_(arg_);
+  malloc_storage().CommitBack();
+  if (FLAG_use_sigaltstack) UnsetAlternateSignalStack();
+
+  this->Destroy();
+
+  return res;
+}
+
+void AsanThread::ClearShadowForThreadStack() {
+  PoisonShadow(stack_bottom_, stack_top_ - stack_bottom_, 0);
+}
+
+const char *AsanThread::GetFrameNameByAddr(uintptr_t addr, uintptr_t *offset) {
+  uintptr_t bottom = 0;
+  bool is_fake_stack = false;
+  if (AddrIsInStack(addr)) {
+    bottom = stack_bottom();
+  } else {
+    bottom = fake_stack().AddrIsInFakeStack(addr);
+    CHECK(bottom);
+    is_fake_stack = true;
+  }
+  uintptr_t aligned_addr = addr & ~(__WORDSIZE/8 - 1);  // align addr.
+  uintptr_t *ptr = (uintptr_t*)aligned_addr;
+  while (ptr >= (uintptr_t*)bottom) {
+    if (ptr[0] == kCurrentStackFrameMagic ||
+        (is_fake_stack && ptr[0] == kRetiredStackFrameMagic)) {
+      *offset = addr - (uintptr_t)ptr;
+      return (const char*)ptr[1];
+    }
+    ptr--;
+  }
+  *offset = 0;
+  return "UNKNOWN";
+}
+
+}  // namespace __asan
diff --git a/lib/asan/asan_thread.h b/lib/asan/asan_thread.h
new file mode 100644
index 0000000..09607d9
--- /dev/null
+++ b/lib/asan/asan_thread.h
@@ -0,0 +1,111 @@
+//===-- asan_thread.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header for asan_thread.cc.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_THREAD_H
+#define ASAN_THREAD_H
+
+#include "asan_allocator.h"
+#include "asan_internal.h"
+#include "asan_stack.h"
+#include "asan_stats.h"
+
+namespace __asan {
+
+const size_t kMaxThreadStackSize = 16 * (1 << 20);  // 16M
+
+class AsanThread;
+
+// These objects are created for every thread and are never deleted,
+// so we can find them by tid even if the thread is long dead.
+class AsanThreadSummary {
+ public:
+  explicit AsanThreadSummary(LinkerInitialized) { }  // for T0.
+  AsanThreadSummary(int parent_tid, AsanStackTrace *stack)
+      : parent_tid_(parent_tid),
+        announced_(false) {
+    tid_ = -1;
+    if (stack) {
+      stack_ = *stack;
+    }
+    thread_ = 0;
+  }
+  void Announce() {
+    if (tid_ == 0) return;  // no need to announce the main thread.
+    if (!announced_) {
+      announced_ = true;
+      Printf("Thread T%d created by T%d here:\n", tid_, parent_tid_);
+      stack_.PrintStack();
+    }
+  }
+  int tid() { return tid_; }
+  void set_tid(int tid) { tid_ = tid; }
+  AsanThread *thread() { return thread_; }
+  void set_thread(AsanThread *thread) { thread_ = thread; }
+  static void TSDDtor(void *tsd);
+
+ private:
+  int tid_;
+  int parent_tid_;
+  bool announced_;
+  AsanStackTrace stack_;
+  AsanThread *thread_;
+};
+
+// AsanThread are stored in TSD and destroyed when the thread dies.
+class AsanThread {
+ public:
+  explicit AsanThread(LinkerInitialized);  // for T0.
+  static AsanThread *Create(int parent_tid, thread_callback_t start_routine,
+                            void *arg, AsanStackTrace *stack);
+  void Destroy();
+
+  void Init();  // Should be called from the thread itself.
+  thread_return_t ThreadStart();
+
+  uintptr_t stack_top() { return stack_top_; }
+  uintptr_t stack_bottom() { return stack_bottom_; }
+  size_t stack_size() { return stack_top_ - stack_bottom_; }
+  int tid() { return summary_->tid(); }
+  AsanThreadSummary *summary() { return summary_; }
+  void set_summary(AsanThreadSummary *summary) { summary_ = summary; }
+
+  const char *GetFrameNameByAddr(uintptr_t addr, uintptr_t *offset);
+
+  bool AddrIsInStack(uintptr_t addr) {
+    return addr >= stack_bottom_ && addr < stack_top_;
+  }
+
+  FakeStack &fake_stack() { return fake_stack_; }
+  AsanThreadLocalMallocStorage &malloc_storage() { return malloc_storage_; }
+  AsanStats &stats() { return stats_; }
+
+  static const int kInvalidTid = -1;
+
+ private:
+
+  void SetThreadStackTopAndBottom();
+  void ClearShadowForThreadStack();
+  AsanThreadSummary *summary_;
+  thread_callback_t start_routine_;
+  void *arg_;
+  uintptr_t  stack_top_;
+  uintptr_t  stack_bottom_;
+
+  FakeStack fake_stack_;
+  AsanThreadLocalMallocStorage malloc_storage_;
+  AsanStats stats_;
+};
+
+}  // namespace __asan
+
+#endif  // ASAN_THREAD_H
diff --git a/lib/asan/asan_thread_registry.cc b/lib/asan/asan_thread_registry.cc
new file mode 100644
index 0000000..09f90fa
--- /dev/null
+++ b/lib/asan/asan_thread_registry.cc
@@ -0,0 +1,174 @@
+//===-- asan_thread_registry.cc ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// AsanThreadRegistry-related code. AsanThreadRegistry is a container
+// for summaries of all created threads.
+//===----------------------------------------------------------------------===//
+
+#include "asan_stack.h"
+#include "asan_thread.h"
+#include "asan_thread_registry.h"
+
+namespace __asan {
+
+static AsanThreadRegistry asan_thread_registry(__asan::LINKER_INITIALIZED);
+
+AsanThreadRegistry &asanThreadRegistry() {
+  return asan_thread_registry;
+}
+
+AsanThreadRegistry::AsanThreadRegistry(LinkerInitialized x)
+    : main_thread_(x),
+      main_thread_summary_(x),
+      accumulated_stats_(x),
+      mu_(x) { }
+
+void AsanThreadRegistry::Init() {
+  AsanTSDInit(AsanThreadSummary::TSDDtor);
+  main_thread_.set_summary(&main_thread_summary_);
+  main_thread_summary_.set_thread(&main_thread_);
+  RegisterThread(&main_thread_);
+  SetCurrent(&main_thread_);
+  // At this point only one thread exists.
+  inited_ = true;
+}
+
+void AsanThreadRegistry::RegisterThread(AsanThread *thread) {
+  ScopedLock lock(&mu_);
+  int tid = n_threads_;
+  n_threads_++;
+  CHECK(n_threads_ < kMaxNumberOfThreads);
+
+  AsanThreadSummary *summary = thread->summary();
+  CHECK(summary != NULL);
+  summary->set_tid(tid);
+  thread_summaries_[tid] = summary;
+}
+
+void AsanThreadRegistry::UnregisterThread(AsanThread *thread) {
+  ScopedLock lock(&mu_);
+  FlushToAccumulatedStatsUnlocked(&thread->stats());
+  AsanThreadSummary *summary = thread->summary();
+  CHECK(summary);
+  summary->set_thread(NULL);
+}
+
+AsanThread *AsanThreadRegistry::GetMain() {
+  return &main_thread_;
+}
+
+AsanThread *AsanThreadRegistry::GetCurrent() {
+  AsanThreadSummary *summary = (AsanThreadSummary *)AsanTSDGet();
+  if (!summary) {
+#ifdef ANDROID
+    // On Android, libc constructor is called _after_ asan_init, and cleans up
+    // TSD. Try to figure out if this is still the main thread by the stack
+    // address. We are not entirely sure that we have correct main thread
+    // limits, so only do this magic on Android, and only if the found thread is
+    // the main thread.
+    AsanThread* thread = FindThreadByStackAddress((uintptr_t)&summary);
+    if (thread && thread->tid() == 0) {
+      SetCurrent(thread);
+      return thread;
+    }
+#endif
+    return 0;
+  }
+  return summary->thread();
+}
+
+void AsanThreadRegistry::SetCurrent(AsanThread *t) {
+  CHECK(t->summary());
+  if (FLAG_v >= 2) {
+    Report("SetCurrent: %p for thread %p\n", t->summary(), GetThreadSelf());
+  }
+  // Make sure we do not reset the current AsanThread.
+  CHECK(AsanTSDGet() == 0);
+  AsanTSDSet(t->summary());
+  CHECK(AsanTSDGet() == t->summary());
+}
+
+AsanStats &AsanThreadRegistry::GetCurrentThreadStats() {
+  AsanThread *t = GetCurrent();
+  return (t) ? t->stats() : main_thread_.stats();
+}
+
+AsanStats AsanThreadRegistry::GetAccumulatedStats() {
+  ScopedLock lock(&mu_);
+  UpdateAccumulatedStatsUnlocked();
+  return accumulated_stats_;
+}
+
+size_t AsanThreadRegistry::GetCurrentAllocatedBytes() {
+  ScopedLock lock(&mu_);
+  UpdateAccumulatedStatsUnlocked();
+  return accumulated_stats_.malloced - accumulated_stats_.freed;
+}
+
+size_t AsanThreadRegistry::GetHeapSize() {
+  ScopedLock lock(&mu_);
+  UpdateAccumulatedStatsUnlocked();
+  return accumulated_stats_.mmaped;
+}
+
+size_t AsanThreadRegistry::GetFreeBytes() {
+  ScopedLock lock(&mu_);
+  UpdateAccumulatedStatsUnlocked();
+  return accumulated_stats_.mmaped
+         - accumulated_stats_.malloced
+         - accumulated_stats_.malloced_redzones
+         + accumulated_stats_.really_freed
+         + accumulated_stats_.really_freed_redzones;
+}
+
+AsanThreadSummary *AsanThreadRegistry::FindByTid(int tid) {
+  CHECK(tid >= 0);
+  CHECK(tid < n_threads_);
+  CHECK(thread_summaries_[tid]);
+  return thread_summaries_[tid];
+}
+
+AsanThread *AsanThreadRegistry::FindThreadByStackAddress(uintptr_t addr) {
+  ScopedLock lock(&mu_);
+  // Main thread (tid = 0) stack limits are pretty much guessed; for the other
+  // threads we ask libpthread, so their limits must be correct.
+  // Scanning the thread list backwards makes this function more reliable.
+  for (int tid = n_threads_ - 1; tid >= 0; tid--) {
+    AsanThread *t = thread_summaries_[tid]->thread();
+    if (!t || !(t->fake_stack().StackSize())) continue;
+    if (t->fake_stack().AddrIsInFakeStack(addr) || t->AddrIsInStack(addr)) {
+      return t;
+    }
+  }
+  return 0;
+}
+
+void AsanThreadRegistry::UpdateAccumulatedStatsUnlocked() {
+  for (int tid = 0; tid < n_threads_; tid++) {
+    AsanThread *t = thread_summaries_[tid]->thread();
+    if (t != NULL) {
+      FlushToAccumulatedStatsUnlocked(&t->stats());
+    }
+  }
+}
+
+void AsanThreadRegistry::FlushToAccumulatedStatsUnlocked(AsanStats *stats) {
+  // AsanStats consists of variables of type size_t only.
+  size_t *dst = (size_t*)&accumulated_stats_;
+  size_t *src = (size_t*)stats;
+  size_t num_fields = sizeof(AsanStats) / sizeof(size_t);
+  for (size_t i = 0; i < num_fields; i++) {
+    dst[i] += src[i];
+    src[i] = 0;
+  }
+}
+
+}  // namespace __asan
diff --git a/lib/asan/asan_thread_registry.h b/lib/asan/asan_thread_registry.h
new file mode 100644
index 0000000..491101e
--- /dev/null
+++ b/lib/asan/asan_thread_registry.h
@@ -0,0 +1,81 @@
+//===-- asan_thread_registry.h ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header for asan_thread_registry.cc
+//===----------------------------------------------------------------------===//
+
+#ifndef ASAN_THREAD_REGISTRY_H
+#define ASAN_THREAD_REGISTRY_H
+
+#include "asan_lock.h"
+#include "asan_stack.h"
+#include "asan_stats.h"
+#include "asan_thread.h"
+
+namespace __asan {
+
+// Stores summaries of all created threads, returns current thread,
+// thread by tid, thread by stack address. There is a single instance
+// of AsanThreadRegistry for the whole program.
+// AsanThreadRegistry is thread-safe.
+class AsanThreadRegistry {
+ public:
+  explicit AsanThreadRegistry(LinkerInitialized);
+  void Init();
+  void RegisterThread(AsanThread *thread);
+  void UnregisterThread(AsanThread *thread);
+
+  AsanThread *GetMain();
+  // Get the current thread. May return NULL.
+  AsanThread *GetCurrent();
+  void SetCurrent(AsanThread *t);
+
+  int GetCurrentTidOrMinusOne() {
+    if (!inited_) return 0;
+    AsanThread *t = GetCurrent();
+    return t ? t->tid() : -1;
+  }
+
+  // Returns stats for GetCurrent(), or stats for
+  // T0 if GetCurrent() returns NULL.
+  AsanStats &GetCurrentThreadStats();
+  // Flushes all thread-local stats to accumulated stats, and returns
+  // a copy of accumulated stats.
+  AsanStats GetAccumulatedStats();
+  size_t GetCurrentAllocatedBytes();
+  size_t GetHeapSize();
+  size_t GetFreeBytes();
+
+  AsanThreadSummary *FindByTid(int tid);
+  AsanThread *FindThreadByStackAddress(uintptr_t addr);
+
+ private:
+  void UpdateAccumulatedStatsUnlocked();
+  // Adds values of all counters in "stats" to accumulated stats,
+  // and fills "stats" with zeroes.
+  void FlushToAccumulatedStatsUnlocked(AsanStats *stats);
+
+  static const int kMaxNumberOfThreads = (1 << 22);  // 4M
+  AsanThreadSummary *thread_summaries_[kMaxNumberOfThreads];
+  AsanThread main_thread_;
+  AsanThreadSummary main_thread_summary_;
+  AsanStats accumulated_stats_;
+  int n_threads_;
+  AsanLock mu_;
+  bool inited_;
+};
+
+// Returns a single instance of registry.
+AsanThreadRegistry &asanThreadRegistry();
+
+}  // namespace __asan
+
+#endif  // ASAN_THREAD_REGISTRY_H
diff --git a/lib/asan/asan_win.cc b/lib/asan/asan_win.cc
new file mode 100644
index 0000000..523f90d
--- /dev/null
+++ b/lib/asan/asan_win.cc
@@ -0,0 +1,316 @@
+//===-- asan_win.cc -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Windows-specific details.
+//===----------------------------------------------------------------------===//
+#ifdef _WIN32
+#include <windows.h>
+
+#include <dbghelp.h>
+#include <stdlib.h>
+
+#include <new>  // FIXME: temporarily needed for placement new in AsanLock.
+
+#include "asan_interceptors.h"
+#include "asan_internal.h"
+#include "asan_lock.h"
+#include "asan_procmaps.h"
+#include "asan_thread.h"
+
+// Should not add dependency on libstdc++,
+// since most of the stuff here is inlinable.
+#include <algorithm>
+
+namespace __asan {
+
+// ---------------------- Memory management ---------------- {{{1
+void *AsanMmapFixedNoReserve(uintptr_t fixed_addr, size_t size) {
+  return VirtualAlloc((LPVOID)fixed_addr, size,
+                      MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+}
+
+void *AsanMmapSomewhereOrDie(size_t size, const char *mem_type) {
+  void *rv = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  if (rv == NULL)
+    OutOfMemoryMessageAndDie(mem_type, size);
+  return rv;
+}
+
+void *AsanMprotect(uintptr_t fixed_addr, size_t size) {
+  return VirtualAlloc((LPVOID)fixed_addr, size,
+                      MEM_RESERVE | MEM_COMMIT, PAGE_NOACCESS);
+}
+
+void AsanUnmapOrDie(void *addr, size_t size) {
+  CHECK(VirtualFree(addr, size, MEM_DECOMMIT));
+}
+
+// ---------------------- IO ---------------- {{{1
+size_t AsanWrite(int fd, const void *buf, size_t count) {
+  if (fd != 2)
+    UNIMPLEMENTED();
+
+  HANDLE err = GetStdHandle(STD_ERROR_HANDLE);
+  if (err == NULL)
+    return 0;  // FIXME: this might not work on some apps.
+  DWORD ret;
+  if (!WriteFile(err, buf, count, &ret, NULL))
+    return 0;
+  return ret;
+}
+
+// FIXME: Looks like these functions are not needed and are linked in by the
+// code unreachable on Windows. We should clean this up.
+int AsanOpenReadonly(const char* filename) {
+  UNIMPLEMENTED();
+}
+
+size_t AsanRead(int fd, void *buf, size_t count) {
+  UNIMPLEMENTED();
+}
+
+int AsanClose(int fd) {
+  UNIMPLEMENTED();
+}
+
+// ---------------------- Stacktraces, symbols, etc. ---------------- {{{1
+static AsanLock dbghelp_lock(LINKER_INITIALIZED);
+static bool dbghelp_initialized = false;
+#pragma comment(lib, "dbghelp.lib")
+
+void AsanThread::SetThreadStackTopAndBottom() {
+  MEMORY_BASIC_INFORMATION mbi;
+  CHECK(VirtualQuery(&mbi /* on stack */,
+                    &mbi, sizeof(mbi)) != 0);
+  // FIXME: is it possible for the stack to not be a single allocation?
+  // Are these values what ASan expects to get (reserved, not committed;
+  // including stack guard page) ?
+  stack_top_ = (uintptr_t)mbi.BaseAddress + mbi.RegionSize;
+  stack_bottom_ = (uintptr_t)mbi.AllocationBase;
+}
+
+void AsanStackTrace::GetStackTrace(size_t max_s, uintptr_t pc, uintptr_t bp) {
+  max_size = max_s;
+  void *tmp[kStackTraceMax];
+
+  // FIXME: CaptureStackBackTrace might be too slow for us.
+  // FIXME: Compare with StackWalk64.
+  // FIXME: Look at LLVMUnhandledExceptionFilter in Signals.inc
+  size_t cs_ret = CaptureStackBackTrace(1, max_size, tmp, NULL),
+         offset = 0;
+  // Skip the RTL frames by searching for the PC in the stacktrace.
+  // FIXME: this doesn't work well for the malloc/free stacks yet.
+  for (size_t i = 0; i < cs_ret; i++) {
+    if (pc != (uintptr_t)tmp[i])
+      continue;
+    offset = i;
+    break;
+  }
+
+  size = cs_ret - offset;
+  for (size_t i = 0; i < size; i++)
+    trace[i] = (uintptr_t)tmp[i + offset];
+}
+
+bool WinSymbolize(const void *addr, char *out_buffer, int buffer_size) {
+  ScopedLock lock(&dbghelp_lock);
+  if (!dbghelp_initialized) {
+    SymSetOptions(SYMOPT_DEFERRED_LOADS |
+                  SYMOPT_UNDNAME |
+                  SYMOPT_LOAD_LINES);
+    CHECK(SymInitialize(GetCurrentProcess(), NULL, TRUE));
+    // FIXME: We don't call SymCleanup() on exit yet - should we?
+    dbghelp_initialized = true;
+  }
+
+  // See http://msdn.microsoft.com/en-us/library/ms680578(VS.85).aspx
+  char buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(CHAR)];
+  PSYMBOL_INFO symbol = (PSYMBOL_INFO)buffer;
+  symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
+  symbol->MaxNameLen = MAX_SYM_NAME;
+  DWORD64 offset = 0;
+  BOOL got_objname = SymFromAddr(GetCurrentProcess(),
+                                 (DWORD64)addr, &offset, symbol);
+  if (!got_objname)
+    return false;
+
+  DWORD  unused;
+  IMAGEHLP_LINE64 info;
+  info.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
+  BOOL got_fileline = SymGetLineFromAddr64(GetCurrentProcess(),
+                                           (DWORD64)addr, &unused, &info);
+  int written = 0;
+  out_buffer[0] = '\0';
+  // FIXME: it might be useful to print out 'obj' or 'obj+offset' info too.
+  if (got_fileline) {
+    written += SNPrintf(out_buffer + written, buffer_size - written,
+                        " %s %s:%d", symbol->Name,
+                        info.FileName, info.LineNumber);
+  } else {
+    written += SNPrintf(out_buffer + written, buffer_size - written,
+                        " %s+0x%p", symbol->Name, offset);
+  }
+  return true;
+}
+
+// ---------------------- AsanLock ---------------- {{{1
+enum LockState {
+  LOCK_UNINITIALIZED = 0,
+  LOCK_READY = -1,
+};
+
+AsanLock::AsanLock(LinkerInitialized li) {
+  // FIXME: see comments in AsanLock::Lock() for the details.
+  CHECK(li == LINKER_INITIALIZED || owner_ == LOCK_UNINITIALIZED);
+
+  CHECK(sizeof(CRITICAL_SECTION) <= sizeof(opaque_storage_));
+  InitializeCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
+  owner_ = LOCK_READY;
+}
+
+void AsanLock::Lock() {
+  if (owner_ == LOCK_UNINITIALIZED) {
+    // FIXME: hm, global AsanLock objects are not initialized?!?
+    // This might be a side effect of the clang+cl+link Frankenbuild...
+    new(this) AsanLock((LinkerInitialized)(LINKER_INITIALIZED + 1));
+
+    // FIXME: If it turns out the linker doesn't invoke our
+    // constructors, we should probably manually Lock/Unlock all the global
+    // locks while we're starting in one thread to avoid double-init races.
+  }
+  EnterCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
+  CHECK(owner_ == LOCK_READY);
+  owner_ = GetThreadSelf();
+}
+
+void AsanLock::Unlock() {
+  CHECK(owner_ == GetThreadSelf());
+  owner_ = LOCK_READY;
+  LeaveCriticalSection((LPCRITICAL_SECTION)opaque_storage_);
+}
+
+// ---------------------- TSD ---------------- {{{1
+static bool tsd_key_inited = false;
+
+static __declspec(thread) void *fake_tsd = NULL;
+
+void AsanTSDInit(void (*destructor)(void *tsd)) {
+  // FIXME: we're ignoring the destructor for now.
+  tsd_key_inited = true;
+}
+
+void *AsanTSDGet() {
+  CHECK(tsd_key_inited);
+  return fake_tsd;
+}
+
+void AsanTSDSet(void *tsd) {
+  CHECK(tsd_key_inited);
+  fake_tsd = tsd;
+}
+
+// ---------------------- Various stuff ---------------- {{{1
+void *AsanDoesNotSupportStaticLinkage() {
+#if defined(_DEBUG)
+#error Please build the runtime with a non-debug CRT: /MD or /MT
+#endif
+  return NULL;
+}
+
+bool AsanShadowRangeIsAvailable() {
+  // FIXME: shall we do anything here on Windows?
+  return true;
+}
+
+int AtomicInc(int *a) {
+  return InterlockedExchangeAdd((LONG*)a, 1) + 1;
+}
+
+uint16_t AtomicExchange(uint16_t *a, uint16_t new_val) {
+  // InterlockedExchange16 seems unavailable on some MSVS installations.
+  // Everybody stand back, I pretend to know inline assembly!
+  // FIXME: I assume VC is smart enough to save/restore eax/ecx?
+  __asm {
+    mov eax, a
+    mov cx, new_val
+    xchg [eax], cx  ; NOLINT
+    mov new_val, cx
+  }
+  return new_val;
+}
+
+const char* AsanGetEnv(const char* name) {
+  static char env_buffer[32767] = {};
+
+  // Note: this implementation stores the result in a static buffer so we only
+  // allow it to be called just once.
+  static bool called_once = false;
+  if (called_once)
+    UNIMPLEMENTED();
+  called_once = true;
+
+  DWORD rv = GetEnvironmentVariableA(name, env_buffer, sizeof(env_buffer));
+  if (rv > 0 && rv < sizeof(env_buffer))
+    return env_buffer;
+  return NULL;
+}
+
+void AsanDumpProcessMap() {
+  UNIMPLEMENTED();
+}
+
+int GetPid() {
+  return GetProcessId(GetCurrentProcess());
+}
+
+uintptr_t GetThreadSelf() {
+  return GetCurrentThreadId();
+}
+
+void SetAlternateSignalStack() {
+  // FIXME: Decide what to do on Windows.
+}
+
+void UnsetAlternateSignalStack() {
+  // FIXME: Decide what to do on Windows.
+}
+
+void InstallSignalHandlers() {
+  // FIXME: Decide what to do on Windows.
+}
+
+void AsanDisableCoreDumper() {
+  UNIMPLEMENTED();
+}
+
+void SleepForSeconds(int seconds) {
+  Sleep(seconds * 1000);
+}
+
+void Exit(int exitcode) {
+  _exit(exitcode);
+}
+
+void Abort() {
+  abort();
+}
+
+int Atexit(void (*function)(void)) {
+  return atexit(function);
+}
+
+void SortArray(uintptr_t *array, size_t size) {
+  std::sort(array, array + size);
+}
+
+}  // namespace __asan
+
+#endif  // _WIN32
diff --git a/lib/asan/interception/Makefile.mk b/lib/asan/interception/Makefile.mk
new file mode 100644
index 0000000..3fb7778
--- /dev/null
+++ b/lib/asan/interception/Makefile.mk
@@ -0,0 +1,23 @@
+#===- lib/asan/interception/Makefile.mk --------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := asan
+SubDirs := mach_override
+
+Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
+ObjNames := $(Sources:%.cc=%.o)
+
+Implementation := Generic
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard $(Dir)/*.h)
+Dependencies += $(wildcard $(Dir)/mach_override/*.h)
+
+# Define a convenience variable for all the asan functions.
+AsanFunctions += $(Sources:%.cc=%)
diff --git a/lib/asan/interception/interception.h b/lib/asan/interception/interception.h
new file mode 100644
index 0000000..fb3eef0
--- /dev/null
+++ b/lib/asan/interception/interception.h
@@ -0,0 +1,157 @@
+//===-- interception.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Machinery for providing replacements/wrappers for system functions.
+//===----------------------------------------------------------------------===//
+
+#ifndef INTERCEPTION_H
+#define INTERCEPTION_H
+
+#if !defined(__linux__) && !defined(__APPLE__) && !defined(_WIN32)
+# error "Interception doesn't work on this operating system."
+#endif
+
+// How to use this library:
+//      1) Include this header to define your own interceptors
+//         (see details below).
+//      2) Build all *.cc files and link against them.
+// On Mac you will also need to:
+//      3) Provide your own implementation for the following functions:
+//           mach_error_t __interception::allocate_island(void **ptr,
+//                                                      size_t size,
+//                                                      void *hint);
+//           mach_error_t __interception::deallocate_island(void *ptr);
+//         See "interception_mac.h" for more details.
+
+// How to add an interceptor:
+// Suppose you need to wrap/replace system function (generally, from libc):
+//      int foo(const char *bar, double baz);
+// You'll need to:
+//      1) define INTERCEPTOR(int, foo, const char *bar, double baz) { ... } in
+//         your source file.
+//      2) Call "INTERCEPT_FUNCTION(foo)" prior to the first call of "foo".
+//         INTERCEPT_FUNCTION(foo) evaluates to "true" iff the function was
+//         intercepted successfully.
+// You can access original function by calling REAL(foo)(bar, baz).
+// By default, REAL(foo) will be visible only inside your interceptor, and if
+// you want to use it in other parts of RTL, you'll need to:
+//      3a) add DECLARE_REAL(int, foo, const char*, double); to a
+//          header file.
+// However, if the call "INTERCEPT_FUNCTION(foo)" and definition for
+// INTERCEPTOR(..., foo, ...) are in different files, you'll instead need to:
+//      3b) add DECLARE_REAL_AND_INTERCEPTOR(int, foo, const char*, double);
+//          to a header file.
+
+// Notes: 1. Things may not work properly if macro INTERCEPT(...) {...} or
+//           DECLARE_REAL(...); are located inside namespaces.
+//        2. On Mac you can also use: "OVERRIDE_FUNCTION(foo, zoo);" to
+//           effectively redirect calls from "foo" to "zoo". In this case
+//           you aren't required to implement
+//           INTERCEPTOR(int, foo, const char *bar, double baz);
+//           but instead you'll have to add
+//           DEFINE_REAL(int, foo, const char *bar, double baz); in your
+//           source file (to define a pointer to overriden function).
+
+// How it works:
+// To replace weak system functions on Linux we just need to declare functions
+// with same names in our library and then obtain the real function pointers
+// using dlsym(). This is not so on Mac OS, where the two-level namespace makes
+// our replacement functions invisible to other libraries. This may be overcomed
+// using the DYLD_FORCE_FLAT_NAMESPACE, but some errors loading the shared
+// libraries in Chromium were noticed when doing so.
+// Instead we use mach_override, a handy framework for patching functions at
+// runtime. To avoid possible name clashes, our replacement functions have
+// the "wrap_" prefix on Mac.
+
+#if defined(__APPLE__)
+# define WRAP(x) wrap_##x
+# define WRAPPER_NAME(x) "wrap_"#x
+# define INTERCEPTOR_ATTRIBUTE
+#elif defined(_WIN32)
+# if defined(_DLL)  // DLL CRT
+#  define WRAP(x) x
+#  define WRAPPER_NAME(x) #x
+#  define INTERCEPTOR_ATTRIBUTE
+# else  // Static CRT
+#  define WRAP(x) wrap_##x
+#  define WRAPPER_NAME(x) "wrap_"#x
+#  define INTERCEPTOR_ATTRIBUTE
+# endif
+#else
+# define WRAP(x) x
+# define WRAPPER_NAME(x) #x
+# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default")))
+#endif
+
+#define PTR_TO_REAL(x) real_##x
+#define REAL(x) __interception::PTR_TO_REAL(x)
+#define FUNC_TYPE(x) x##_f
+
+#define DECLARE_REAL(ret_type, func, ...); \
+  typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \
+  namespace __interception { \
+    extern FUNC_TYPE(func) PTR_TO_REAL(func); \
+  }
+
+#define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...); \
+  DECLARE_REAL(ret_type, func, ##__VA_ARGS__); \
+  extern "C" ret_type WRAP(func)(__VA_ARGS__);
+
+// FIXME(timurrrr): We might need to add DECLARE_REAL_EX etc to support
+// different calling conventions later.
+
+#define DEFINE_REAL_EX(ret_type, convention, func, ...); \
+  typedef ret_type (convention *FUNC_TYPE(func))(__VA_ARGS__); \
+  namespace __interception { \
+    FUNC_TYPE(func) PTR_TO_REAL(func); \
+  }
+
+// Generally, you don't need to use DEFINE_REAL by itself, as INTERCEPTOR
+// macros does its job. In exceptional cases you may need to call REAL(foo)
+// without defining INTERCEPTOR(..., foo, ...). For example, if you override
+// foo with an interceptor for other function.
+#define DEFAULT_CONVENTION
+
+#define DEFINE_REAL(ret_type, func, ...); \
+  DEFINE_REAL_EX(ret_type, DEFAULT_CONVENTION, func, __VA_ARGS__);
+
+#define INTERCEPTOR_EX(ret_type, convention, func, ...) \
+  DEFINE_REAL_EX(ret_type, convention, func, __VA_ARGS__); \
+  extern "C" \
+  INTERCEPTOR_ATTRIBUTE \
+  ret_type convention WRAP(func)(__VA_ARGS__)
+
+#define INTERCEPTOR(ret_type, func, ...) \
+  INTERCEPTOR_EX(ret_type, DEFAULT_CONVENTION, func, __VA_ARGS__)
+
+#if defined(_WIN32)
+# define INTERCEPTOR_WINAPI(ret_type, func, ...) \
+  INTERCEPTOR_EX(ret_type, __stdcall, func, __VA_ARGS__)
+#endif
+
+#define INCLUDED_FROM_INTERCEPTION_LIB
+
+#if defined(__linux__)
+# include "interception_linux.h"
+# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_LINUX(func)
+#elif defined(__APPLE__)
+# include "interception_mac.h"
+# define OVERRIDE_FUNCTION(old_func, new_func) \
+    OVERRIDE_FUNCTION_MAC(old_func, new_func)
+# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_MAC(func)
+#else  // defined(_WIN32)
+# include "interception_win.h"
+# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_WIN(func)
+#endif
+
+#undef INCLUDED_FROM_INTERCEPTION_LIB
+
+#endif  // INTERCEPTION_H
diff --git a/lib/asan/interception/interception_linux.cc b/lib/asan/interception/interception_linux.cc
new file mode 100644
index 0000000..74fcf56
--- /dev/null
+++ b/lib/asan/interception/interception_linux.cc
@@ -0,0 +1,28 @@
+//===-- interception_linux.cc -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Linux-specific interception methods.
+//===----------------------------------------------------------------------===//
+
+#ifdef __linux__
+
+#include <stddef.h>  // for NULL
+#include <dlfcn.h>   // for dlsym
+
+namespace __interception {
+bool GetRealFunctionAddress(const char *func_name, void **func_addr) {
+  *func_addr = dlsym(RTLD_NEXT, func_name);
+  return (*func_addr != NULL);
+}
+}  // namespace __interception
+
+
+#endif  // __linux__
diff --git a/lib/asan/interception/interception_linux.h b/lib/asan/interception/interception_linux.h
new file mode 100644
index 0000000..102b3c1
--- /dev/null
+++ b/lib/asan/interception/interception_linux.h
@@ -0,0 +1,33 @@
+//===-- interception_linux.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Linux-specific interception methods.
+//===----------------------------------------------------------------------===//
+
+#ifdef __linux__
+
+#if !defined(INCLUDED_FROM_INTERCEPTION_LIB)
+# error "interception_linux.h should be included from interception library only"
+#endif
+
+#ifndef INTERCEPTION_LINUX_H
+#define INTERCEPTION_LINUX_H
+
+namespace __interception {
+// returns true if a function with the given name was found.
+bool GetRealFunctionAddress(const char *func_name, void **func_addr);
+}  // namespace __interception
+
+#define INTERCEPT_FUNCTION_LINUX(func) \
+    ::__interception::GetRealFunctionAddress(#func, (void**)&REAL(func))
+
+#endif  // INTERCEPTION_LINUX_H
+#endif  // __linux__
diff --git a/lib/asan/interception/interception_mac.cc b/lib/asan/interception/interception_mac.cc
new file mode 100644
index 0000000..cc9e4a7
--- /dev/null
+++ b/lib/asan/interception/interception_mac.cc
@@ -0,0 +1,33 @@
+//===-- interception_mac.cc -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Mac-specific interception methods.
+//===----------------------------------------------------------------------===//
+
+#ifdef __APPLE__
+
+#define INCLUDED_FROM_INTERCEPTION_LIB
+#include "interception_mac.h"
+#undef INCLUDED_FROM_INTERCEPTION_LIB
+#include "mach_override/mach_override.h"
+
+namespace __interception {
+bool OverrideFunction(void *old_func, void *new_func, void **orig_old_func) {
+  *orig_old_func = NULL;
+  int res = __asan_mach_override_ptr_custom(old_func, new_func,
+                                            orig_old_func,
+                                            __interception_allocate_island,
+                                            __interception_deallocate_island);
+  return (res == 0) && (*orig_old_func != NULL);
+}
+}  // namespace __interception
+
+#endif  // __APPLE__
diff --git a/lib/asan/interception/interception_mac.h b/lib/asan/interception/interception_mac.h
new file mode 100644
index 0000000..224d961
--- /dev/null
+++ b/lib/asan/interception/interception_mac.h
@@ -0,0 +1,47 @@
+//===-- interception_mac.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Mac-specific interception methods.
+//===----------------------------------------------------------------------===//
+
+#ifdef __APPLE__
+
+#if !defined(INCLUDED_FROM_INTERCEPTION_LIB)
+# error "interception_mac.h should be included from interception.h only"
+#endif
+
+#ifndef INTERCEPTION_MAC_H
+#define INTERCEPTION_MAC_H
+
+#include <mach/mach_error.h>
+#include <stddef.h>
+
+// Allocate memory for the escape island. This cannot be moved to
+// mach_override, because each user of interceptors may specify its
+// own memory range for escape islands.
+extern "C" {
+mach_error_t __interception_allocate_island(void **ptr, size_t unused_size,
+                                            void *unused_hint);
+mach_error_t __interception_deallocate_island(void *ptr);
+}  // extern "C"
+
+namespace __interception {
+// returns true if the old function existed.
+bool OverrideFunction(void *old_func, void *new_func, void **orig_old_func);
+}  // namespace __interception
+
+# define OVERRIDE_FUNCTION_MAC(old_func, new_func) \
+    ::__interception::OverrideFunction((void*)old_func, (void*)new_func, \
+                                       (void**)&REAL(old_func))
+# define INTERCEPT_FUNCTION_MAC(func) OVERRIDE_FUNCTION_MAC(func, WRAP(func))
+
+#endif  // INTERCEPTION_MAC_H
+#endif  // __APPLE__
diff --git a/lib/asan/interception/interception_win.cc b/lib/asan/interception/interception_win.cc
new file mode 100644
index 0000000..a60c741
--- /dev/null
+++ b/lib/asan/interception/interception_win.cc
@@ -0,0 +1,149 @@
+//===-- interception_linux.cc -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Windows-specific interception methods.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+
+#include <windows.h>
+
+namespace __interception {
+
+bool GetRealFunctionAddress(const char *func_name, void **func_addr) {
+  const char *DLLS[] = {
+    "msvcr80.dll",
+    "msvcr90.dll",
+    "kernel32.dll",
+    NULL
+  };
+  *func_addr = NULL;
+  for (size_t i = 0; *func_addr == NULL && DLLS[i]; ++i) {
+    *func_addr = GetProcAddress(GetModuleHandleA(DLLS[i]), func_name);
+  }
+  return (*func_addr != NULL);
+}
+
+// FIXME: internal_str* and internal_mem* functions should be moved from the
+// ASan sources into interception/.
+
+static void _memset(void *p, int value, size_t sz) {
+  for (size_t i = 0; i < sz; ++i)
+    ((char*)p)[i] = (char)value;
+}
+
+static void _memcpy(void *dst, void *src, size_t sz) {
+  char *dst_c = (char*)dst,
+       *src_c = (char*)src;
+  for (size_t i = 0; i < sz; ++i)
+    dst_c[i] = src_c[i];
+}
+
+static void WriteJumpInstruction(char *jmp_from, char *to) {
+  // jmp XXYYZZWW = E9 WW ZZ YY XX, where XXYYZZWW is an offset fromt jmp_from
+  // to the next instruction to the destination.
+  ptrdiff_t offset = to - jmp_from - 5;
+  *jmp_from = '\xE9';
+  *(ptrdiff_t*)(jmp_from + 1) = offset;
+}
+
+bool OverrideFunction(void *old_func, void *new_func, void **orig_old_func) {
+#ifdef _WIN64
+# error OverrideFunction was not tested on x64
+#endif
+  // Basic idea:
+  // We write 5 bytes (jmp-to-new_func) at the beginning of the 'old_func'
+  // to override it. We want to be able to execute the original 'old_func' from
+  // the wrapper, so we need to keep the leading 5+ bytes ('head') of the
+  // original instructions somewhere with a "jmp old_func+head".
+  // We call these 'head'+5 bytes of instructions a "trampoline".
+
+  // Trampolines are allocated from a common pool.
+  const int POOL_SIZE = 1024;
+  static char *pool = NULL;
+  static size_t pool_used = 0;
+  if (pool == NULL) {
+    pool = (char*)VirtualAlloc(NULL, POOL_SIZE,
+                               MEM_RESERVE | MEM_COMMIT,
+                               PAGE_EXECUTE_READWRITE);
+    // FIXME: set PAGE_EXECUTE_READ access after setting all interceptors?
+    if (pool == NULL)
+      return false;
+    _memset(pool, 0xCC /* int 3 */, POOL_SIZE);
+  }
+
+  char* old_bytes = (char*)old_func;
+  char* trampoline = pool + pool_used;
+
+  // Find out the number of bytes of the instructions we need to copy to the
+  // island and store it in 'head'.
+  size_t head = 0;
+  while (head < 5) {
+    switch (old_bytes[head]) {
+      case '\x55':  // push ebp
+      case '\x56':  // push esi
+      case '\x57':  // push edi
+        head++;
+        continue;
+    }
+    switch (*(unsigned short*)(old_bytes + head)) {  // NOLINT
+      case 0xFF8B:  // 8B FF = mov edi, edi
+      case 0xEC8B:  // 8B EC = mov ebp, esp
+      case 0xC033:  // 33 C0 = xor eax, eax
+        head += 2;
+        continue;
+      case 0xEC83:  // 83 EC XX = sub esp, XX
+        head += 3;
+        continue;
+      case 0xC1F7:  // F7 C1 XX YY ZZ WW = test ecx, WWZZYYXX
+        head += 6;
+        continue;
+    }
+    switch (0x00FFFFFF & *(unsigned int*)(old_bytes + head)) {
+      case 0x24448A:  // 8A 44 24 XX = mov eal, dword ptr [esp+XXh]
+      case 0x244C8B:  // 8B 4C 24 XX = mov ecx, dword ptr [esp+XXh]
+      case 0x24548B:  // 8B 54 24 XX = mov edx, dword ptr [esp+XXh]
+      case 0x247C8B:  // 8B 7C 24 XX = mov edi, dword ptr [esp+XXh]
+        head += 4;
+        continue;
+    }
+
+    // Unknown instruction!
+    return false;
+  }
+
+  if (pool_used + head + 5 > POOL_SIZE)
+    return false;
+
+  // Now put the "jump to trampoline" instruction into the original code.
+  DWORD old_prot, unused_prot;
+  if (!VirtualProtect(old_func, head, PAGE_EXECUTE_READWRITE, &old_prot))
+    return false;
+
+  // Put the needed instructions into the trampoline bytes.
+  _memcpy(trampoline, old_bytes, head);
+  WriteJumpInstruction(trampoline + head, old_bytes + head);
+  *orig_old_func = trampoline;
+  pool_used += head + 5;
+
+  // Intercept the 'old_func'.
+  WriteJumpInstruction(old_bytes, (char*)new_func);
+  _memset(old_bytes + 5, 0xCC /* int 3 */, head - 5);
+
+  if (!VirtualProtect(old_func, head, old_prot, &unused_prot))
+    return false;  // not clear if this failure bothers us.
+
+  return true;
+}
+
+}  // namespace __interception
+
+#endif  // _WIN32
diff --git a/lib/asan/interception/interception_win.h b/lib/asan/interception/interception_win.h
new file mode 100644
index 0000000..9d1586e
--- /dev/null
+++ b/lib/asan/interception/interception_win.h
@@ -0,0 +1,42 @@
+//===-- interception_linux.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Windows-specific interception methods.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+
+#if !defined(INCLUDED_FROM_INTERCEPTION_LIB)
+# error "interception_win.h should be included from interception library only"
+#endif
+
+#ifndef INTERCEPTION_WIN_H
+#define INTERCEPTION_WIN_H
+
+namespace __interception {
+// returns true if a function with the given name was found.
+bool GetRealFunctionAddress(const char *func_name, void **func_addr);
+
+// returns true if the old function existed, false on failure.
+bool OverrideFunction(void *old_func, void *new_func, void **orig_old_func);
+}  // namespace __interception
+
+#if defined(_DLL)
+# define INTERCEPT_FUNCTION_WIN(func) \
+    ::__interception::GetRealFunctionAddress(#func, (void**)&REAL(func))
+#else
+# define INTERCEPT_FUNCTION_WIN(func) \
+    ::__interception::OverrideFunction((void*)func, (void*)WRAP(func), \
+                                       (void**)&REAL(func))
+#endif
+
+#endif  // INTERCEPTION_WIN_H
+#endif  // _WIN32
diff --git a/lib/asan/interception/mach_override/LICENSE.TXT b/lib/asan/interception/mach_override/LICENSE.TXT
new file mode 100644
index 0000000..9446965
--- /dev/null
+++ b/lib/asan/interception/mach_override/LICENSE.TXT
@@ -0,0 +1,3 @@
+Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
+Some rights reserved: <http://opensource.org/licenses/mit-license.php>
+
diff --git a/lib/asan/interception/mach_override/Makefile.mk b/lib/asan/interception/mach_override/Makefile.mk
new file mode 100644
index 0000000..78be0b3
--- /dev/null
+++ b/lib/asan/interception/mach_override/Makefile.mk
@@ -0,0 +1,22 @@
+#===- lib/asan/mach_override/Makefile.mk -------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := asan
+SubDirs :=
+
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o)
+
+Implementation := Generic
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard $(Dir)/*.h)
+
+# Define a convenience variable for all the asan functions.
+AsanFunctions += $(Sources:%.c=%)
diff --git a/lib/asan/interception/mach_override/README.txt b/lib/asan/interception/mach_override/README.txt
new file mode 100644
index 0000000..5f62ad7
--- /dev/null
+++ b/lib/asan/interception/mach_override/README.txt
@@ -0,0 +1,9 @@
+-- mach_override.c is taken from upstream version at
+ https://github.com/rentzsch/mach_star/tree/f8e0c424b5be5cb641ded67c265e616157ae4bcf
+-- Added debugging code under DEBUG_DISASM.
+-- The files are guarded with #ifdef __APPLE__
+-- some opcodes are added in order to parse the library functions on Lion
+-- fixupInstructions() is extended to relocate relative calls, not only jumps
+-- mach_override_ptr is renamed to __asan_mach_override_ptr and
+ other functions are marked as hidden.
+
diff --git a/lib/asan/interception/mach_override/mach_override.c b/lib/asan/interception/mach_override/mach_override.c
new file mode 100644
index 0000000..399f82d
--- /dev/null
+++ b/lib/asan/interception/mach_override/mach_override.c
@@ -0,0 +1,957 @@
+/*******************************************************************************
+	mach_override.c
+		Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
+		Some rights reserved: <http://opensource.org/licenses/mit-license.php>
+
+	***************************************************************************/
+#ifdef __APPLE__
+
+#include "mach_override.h"
+
+#include <mach-o/dyld.h>
+#include <mach/mach_host.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#include <sys/mman.h>
+
+#include <CoreServices/CoreServices.h>
+
+//#define DEBUG_DISASM 1
+#undef DEBUG_DISASM
+
+/**************************
+*	
+*	Constants
+*	
+**************************/
+#pragma mark	-
+#pragma mark	(Constants)
+
+#if defined(__ppc__) || defined(__POWERPC__)
+
+long kIslandTemplate[] = {
+	0x9001FFFC,	//	stw		r0,-4(SP)
+	0x3C00DEAD,	//	lis		r0,0xDEAD
+	0x6000BEEF,	//	ori		r0,r0,0xBEEF
+	0x7C0903A6,	//	mtctr	r0
+	0x8001FFFC,	//	lwz		r0,-4(SP)
+	0x60000000,	//	nop		; optionally replaced
+	0x4E800420 	//	bctr
+};
+
+#define kAddressHi			3
+#define kAddressLo			5
+#define kInstructionHi		10
+#define kInstructionLo		11
+
+#elif defined(__i386__) 
+
+#define kOriginalInstructionsSize 16
+
+char kIslandTemplate[] = {
+	// kOriginalInstructionsSize nop instructions so that we 
+	// should have enough space to host original instructions 
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+	// Now the real jump instruction
+	0xE9, 0xEF, 0xBE, 0xAD, 0xDE
+};
+
+#define kInstructions	0
+#define kJumpAddress    kInstructions + kOriginalInstructionsSize + 1
+#elif defined(__x86_64__)
+
+#define kOriginalInstructionsSize 32
+
+#define kJumpAddress    kOriginalInstructionsSize + 6
+
+char kIslandTemplate[] = {
+	// kOriginalInstructionsSize nop instructions so that we 
+	// should have enough space to host original instructions 
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+	// Now the real jump instruction
+	0xFF, 0x25, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00
+};
+
+#endif
+
+#define	kAllocateHigh		1
+#define	kAllocateNormal		0
+
+/**************************
+*	
+*	Data Types
+*	
+**************************/
+#pragma mark	-
+#pragma mark	(Data Types)
+
+typedef	struct	{
+	char	instructions[sizeof(kIslandTemplate)];
+	int		allocatedHigh;
+}	BranchIsland;
+
+/**************************
+*	
+*	Funky Protos
+*	
+**************************/
+#pragma mark	-
+#pragma mark	(Funky Protos)
+
+	mach_error_t
+allocateBranchIsland(
+		BranchIsland	**island,
+		int				allocateHigh,
+		void *originalFunctionAddress) __attribute__((visibility("hidden")));
+
+	mach_error_t
+freeBranchIsland(
+		BranchIsland	*island ) __attribute__((visibility("hidden")));
+
+	mach_error_t
+defaultIslandMalloc(
+	  void **ptr, size_t unused_size, void *hint) __attribute__((visibility("hidden")));
+
+	mach_error_t
+defaultIslandFree(
+   	void *ptr) __attribute__((visibility("hidden")));
+
+#if defined(__ppc__) || defined(__POWERPC__)
+	mach_error_t
+setBranchIslandTarget(
+		BranchIsland	*island,
+		const void		*branchTo,
+		long			instruction ) __attribute__((visibility("hidden")));
+#endif 
+
+#if defined(__i386__) || defined(__x86_64__)
+mach_error_t
+setBranchIslandTarget_i386(
+						   BranchIsland	*island,
+						   const void		*branchTo,
+						   char*			instructions ) __attribute__((visibility("hidden")));
+void 
+atomic_mov64(
+		uint64_t *targetAddress,
+		uint64_t value ) __attribute__((visibility("hidden")));
+
+	static Boolean 
+eatKnownInstructions( 
+	unsigned char	*code, 
+	uint64_t		*newInstruction,
+	int				*howManyEaten, 
+	char			*originalInstructions,
+	int				*originalInstructionCount, 
+	uint8_t			*originalInstructionSizes ) __attribute__((visibility("hidden")));
+
+	static void
+fixupInstructions(
+    void		*originalFunction,
+    void		*escapeIsland,
+    void		*instructionsToFix,
+	int			instructionCount,
+	uint8_t		*instructionSizes ) __attribute__((visibility("hidden")));
+
+#ifdef DEBUG_DISASM
+	static void
+dump16Bytes(
+	void	*ptr);
+#endif  // DEBUG_DISASM
+#endif
+
+/*******************************************************************************
+*	
+*	Interface
+*	
+*******************************************************************************/
+#pragma mark	-
+#pragma mark	(Interface)
+
+#if defined(__i386__) || defined(__x86_64__)
+mach_error_t makeIslandExecutable(void *address) {
+	mach_error_t err = err_none;
+    vm_size_t pageSize;
+    host_page_size( mach_host_self(), &pageSize );
+    uintptr_t page = (uintptr_t)address & ~(uintptr_t)(pageSize-1);
+    int e = err_none;
+    e |= mprotect((void *)page, pageSize, PROT_EXEC | PROT_READ | PROT_WRITE);
+    e |= msync((void *)page, pageSize, MS_INVALIDATE );
+    if (e) {
+        err = err_cannot_override;
+    }
+    return err;
+}
+#endif
+
+		mach_error_t
+defaultIslandMalloc(
+	void **ptr, size_t unused_size, void *hint) {
+  return allocateBranchIsland( (BranchIsland**)ptr, kAllocateHigh, hint );
+}
+		mach_error_t
+defaultIslandFree(
+	void *ptr) {
+	return freeBranchIsland(ptr);
+}
+
+    mach_error_t
+__asan_mach_override_ptr(
+	void *originalFunctionAddress,
+    const void *overrideFunctionAddress,
+    void **originalFunctionReentryIsland )
+{
+  return __asan_mach_override_ptr_custom(originalFunctionAddress,
+		overrideFunctionAddress,
+		originalFunctionReentryIsland,
+		defaultIslandMalloc,
+		defaultIslandFree);
+}
+
+    mach_error_t
+__asan_mach_override_ptr_custom(
+	void *originalFunctionAddress,
+    const void *overrideFunctionAddress,
+    void **originalFunctionReentryIsland,
+		island_malloc *alloc,
+		island_free *dealloc)
+{
+	assert( originalFunctionAddress );
+	assert( overrideFunctionAddress );
+	
+	// this addresses overriding such functions as AudioOutputUnitStart()
+	// test with modified DefaultOutputUnit project
+#if defined(__x86_64__)
+    for(;;){
+        if(*(uint16_t*)originalFunctionAddress==0x25FF)    // jmp qword near [rip+0x????????]
+            originalFunctionAddress=*(void**)((char*)originalFunctionAddress+6+*(int32_t *)((uint16_t*)originalFunctionAddress+1));
+        else break;
+    }
+#elif defined(__i386__)
+    for(;;){
+        if(*(uint16_t*)originalFunctionAddress==0x25FF)    // jmp *0x????????
+            originalFunctionAddress=**(void***)((uint16_t*)originalFunctionAddress+1);
+        else break;
+    }
+#endif
+#ifdef DEBUG_DISASM
+  {
+    fprintf(stderr, "Replacing function at %p\n", originalFunctionAddress);
+    fprintf(stderr, "First 16 bytes of the function: ");
+    unsigned char *orig = (unsigned char *)originalFunctionAddress;
+    int i;
+    for (i = 0; i < 16; i++) {
+       fprintf(stderr, "%x ", (unsigned int) orig[i]);
+    }
+    fprintf(stderr, "\n");
+    fprintf(stderr, 
+            "To disassemble, save the following function as disas.c"
+            " and run:\n  gcc -c disas.c && gobjdump -d disas.o\n"
+            "The first 16 bytes of the original function will start"
+            " after four nop instructions.\n");
+    fprintf(stderr, "\nvoid foo() {\n  asm volatile(\"nop;nop;nop;nop;\");\n");
+    int j = 0;
+    for (j = 0; j < 2; j++) {
+      fprintf(stderr, "  asm volatile(\".byte ");
+      for (i = 8 * j; i < 8 * (j+1) - 1; i++) {
+        fprintf(stderr, "0x%x, ", (unsigned int) orig[i]);
+      }
+      fprintf(stderr, "0x%x;\");\n", (unsigned int) orig[8 * (j+1) - 1]);
+    }
+    fprintf(stderr, "}\n\n");
+  }
+#endif
+
+	long	*originalFunctionPtr = (long*) originalFunctionAddress;
+	mach_error_t	err = err_none;
+	
+#if defined(__ppc__) || defined(__POWERPC__)
+	//	Ensure first instruction isn't 'mfctr'.
+	#define	kMFCTRMask			0xfc1fffff
+	#define	kMFCTRInstruction	0x7c0903a6
+	
+	long	originalInstruction = *originalFunctionPtr;
+	if( !err && ((originalInstruction & kMFCTRMask) == kMFCTRInstruction) )
+		err = err_cannot_override;
+#elif defined(__i386__) || defined(__x86_64__)
+	int eatenCount = 0;
+	int originalInstructionCount = 0;
+	char originalInstructions[kOriginalInstructionsSize];
+	uint8_t originalInstructionSizes[kOriginalInstructionsSize];
+	uint64_t jumpRelativeInstruction = 0; // JMP
+
+	Boolean overridePossible = eatKnownInstructions ((unsigned char *)originalFunctionPtr, 
+										&jumpRelativeInstruction, &eatenCount, 
+										originalInstructions, &originalInstructionCount, 
+										originalInstructionSizes );
+#ifdef DEBUG_DISASM
+  if (!overridePossible) fprintf(stderr, "overridePossible = false @%d\n", __LINE__);
+#endif
+	if (eatenCount > kOriginalInstructionsSize) {
+#ifdef DEBUG_DISASM
+		fprintf(stderr, "Too many instructions eaten\n");
+#endif    
+		overridePossible = false;
+	}
+	if (!overridePossible) err = err_cannot_override;
+	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
+#endif
+	
+	//	Make the original function implementation writable.
+	if( !err ) {
+		err = vm_protect( mach_task_self(),
+				(vm_address_t) originalFunctionPtr, 8, false,
+				(VM_PROT_ALL | VM_PROT_COPY) );
+		if( err )
+			err = vm_protect( mach_task_self(),
+					(vm_address_t) originalFunctionPtr, 8, false,
+					(VM_PROT_DEFAULT | VM_PROT_COPY) );
+	}
+	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
+	
+	//	Allocate and target the escape island to the overriding function.
+	BranchIsland	*escapeIsland = NULL;
+	if( !err )
+		err = alloc( (void**)&escapeIsland, sizeof(BranchIsland), originalFunctionAddress );
+	if ( err ) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
+	
+#if defined(__ppc__) || defined(__POWERPC__)
+	if( !err )
+		err = setBranchIslandTarget( escapeIsland, overrideFunctionAddress, 0 );
+	
+	//	Build the branch absolute instruction to the escape island.
+	long	branchAbsoluteInstruction = 0; // Set to 0 just to silence warning.
+	if( !err ) {
+		long escapeIslandAddress = ((long) escapeIsland) & 0x3FFFFFF;
+		branchAbsoluteInstruction = 0x48000002 | escapeIslandAddress;
+	}
+#elif defined(__i386__) || defined(__x86_64__)
+        if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
+
+	if( !err )
+		err = setBranchIslandTarget_i386( escapeIsland, overrideFunctionAddress, 0 );
+ 
+	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
+	// Build the jump relative instruction to the escape island
+#endif
+
+
+#if defined(__i386__) || defined(__x86_64__)
+	if (!err) {
+		uint32_t addressOffset = ((char*)escapeIsland - (char*)originalFunctionPtr - 5);
+		addressOffset = OSSwapInt32(addressOffset);
+		
+		jumpRelativeInstruction |= 0xE900000000000000LL; 
+		jumpRelativeInstruction |= ((uint64_t)addressOffset & 0xffffffff) << 24;
+		jumpRelativeInstruction = OSSwapInt64(jumpRelativeInstruction);		
+	}
+#endif
+	
+	//	Optionally allocate & return the reentry island. This may contain relocated
+	//  jmp instructions and so has all the same addressing reachability requirements
+	//  the escape island has to the original function, except the escape island is
+	//  technically our original function.
+	BranchIsland	*reentryIsland = NULL;
+	if( !err && originalFunctionReentryIsland ) {
+		err = alloc( (void**)&reentryIsland, sizeof(BranchIsland), escapeIsland);
+		if( !err )
+			*originalFunctionReentryIsland = reentryIsland;
+	}
+	
+#if defined(__ppc__) || defined(__POWERPC__)	
+	//	Atomically:
+	//	o If the reentry island was allocated:
+	//		o Insert the original instruction into the reentry island.
+	//		o Target the reentry island at the 2nd instruction of the
+	//		  original function.
+	//	o Replace the original instruction with the branch absolute.
+	if( !err ) {
+		int escapeIslandEngaged = false;
+		do {
+			if( reentryIsland )
+				err = setBranchIslandTarget( reentryIsland,
+						(void*) (originalFunctionPtr+1), originalInstruction );
+			if( !err ) {
+				escapeIslandEngaged = CompareAndSwap( originalInstruction,
+										branchAbsoluteInstruction,
+										(UInt32*)originalFunctionPtr );
+				if( !escapeIslandEngaged ) {
+					//	Someone replaced the instruction out from under us,
+					//	re-read the instruction, make sure it's still not
+					//	'mfctr' and try again.
+					originalInstruction = *originalFunctionPtr;
+					if( (originalInstruction & kMFCTRMask) == kMFCTRInstruction)
+						err = err_cannot_override;
+				}
+			}
+		} while( !err && !escapeIslandEngaged );
+	}
+#elif defined(__i386__) || defined(__x86_64__)
+	// Atomically:
+	//	o If the reentry island was allocated:
+	//		o Insert the original instructions into the reentry island.
+	//		o Target the reentry island at the first non-replaced 
+	//        instruction of the original function.
+	//	o Replace the original first instructions with the jump relative.
+	//
+	// Note that on i386, we do not support someone else changing the code under our feet
+	if ( !err ) {
+		fixupInstructions(originalFunctionPtr, reentryIsland, originalInstructions,
+					originalInstructionCount, originalInstructionSizes );
+	
+		if( reentryIsland )
+			err = setBranchIslandTarget_i386( reentryIsland,
+										 (void*) ((char *)originalFunctionPtr+eatenCount), originalInstructions );
+		// try making islands executable before planting the jmp
+#if defined(__x86_64__) || defined(__i386__)
+        if( !err )
+            err = makeIslandExecutable(escapeIsland);
+        if( !err && reentryIsland )
+            err = makeIslandExecutable(reentryIsland);
+#endif
+		if ( !err )
+			atomic_mov64((uint64_t *)originalFunctionPtr, jumpRelativeInstruction);
+	}
+#endif
+	
+	//	Clean up on error.
+	if( err ) {
+		if( reentryIsland )
+			dealloc( reentryIsland );
+		if( escapeIsland )
+			dealloc( escapeIsland );
+	}
+
+#ifdef DEBUG_DISASM
+  {
+    fprintf(stderr, "First 16 bytes of the function after slicing: ");
+    unsigned char *orig = (unsigned char *)originalFunctionAddress;
+    int i;
+    for (i = 0; i < 16; i++) {
+       fprintf(stderr, "%x ", (unsigned int) orig[i]);
+    }
+    fprintf(stderr, "\n");
+  }
+#endif
+	return err;
+}
+
+/*******************************************************************************
+*	
+*	Implementation
+*	
+*******************************************************************************/
+#pragma mark	-
+#pragma mark	(Implementation)
+
+/***************************************************************************//**
+	Implementation: Allocates memory for a branch island.
+	
+	@param	island			<-	The allocated island.
+	@param	allocateHigh	->	Whether to allocate the island at the end of the
+								address space (for use with the branch absolute
+								instruction).
+	@result					<-	mach_error_t
+
+	***************************************************************************/
+
+	mach_error_t
+allocateBranchIsland(
+		BranchIsland	**island,
+		int				allocateHigh,
+		void *originalFunctionAddress)
+{
+	assert( island );
+	
+	mach_error_t	err = err_none;
+	
+	if( allocateHigh ) {
+		vm_size_t pageSize;
+		err = host_page_size( mach_host_self(), &pageSize );
+		if( !err ) {
+			assert( sizeof( BranchIsland ) <= pageSize );
+#if defined(__ppc__) || defined(__POWERPC__)
+			vm_address_t first = 0xfeffffff;
+			vm_address_t last = 0xfe000000 + pageSize;
+#elif defined(__x86_64__)
+			vm_address_t first = ((uint64_t)originalFunctionAddress & ~(uint64_t)(((uint64_t)1 << 31) - 1)) | ((uint64_t)1 << 31); // start in the middle of the page?
+			vm_address_t last = 0x0;
+#else
+			vm_address_t first = 0xffc00000;
+			vm_address_t last = 0xfffe0000;
+#endif
+
+			vm_address_t page = first;
+			int allocated = 0;
+			vm_map_t task_self = mach_task_self();
+			
+			while( !err && !allocated && page != last ) {
+
+				err = vm_allocate( task_self, &page, pageSize, 0 );
+				if( err == err_none )
+					allocated = 1;
+				else if( err == KERN_NO_SPACE ) {
+#if defined(__x86_64__)
+					page -= pageSize;
+#else
+					page += pageSize;
+#endif
+					err = err_none;
+				}
+			}
+			if( allocated )
+				*island = (BranchIsland*) page;
+			else if( !allocated && !err )
+				err = KERN_NO_SPACE;
+		}
+	} else {
+		void *block = malloc( sizeof( BranchIsland ) );
+		if( block )
+			*island = block;
+		else
+			err = KERN_NO_SPACE;
+	}
+	if( !err )
+		(**island).allocatedHigh = allocateHigh;
+	
+	return err;
+}
+
+/***************************************************************************//**
+	Implementation: Deallocates memory for a branch island.
+	
+	@param	island	->	The island to deallocate.
+	@result			<-	mach_error_t
+
+	***************************************************************************/
+
+	mach_error_t
+freeBranchIsland(
+		BranchIsland	*island )
+{
+	assert( island );
+	assert( (*(long*)&island->instructions[0]) == kIslandTemplate[0] );
+	assert( island->allocatedHigh );
+	
+	mach_error_t	err = err_none;
+	
+	if( island->allocatedHigh ) {
+		vm_size_t pageSize;
+		err = host_page_size( mach_host_self(), &pageSize );
+		if( !err ) {
+			assert( sizeof( BranchIsland ) <= pageSize );
+			err = vm_deallocate(
+					mach_task_self(),
+					(vm_address_t) island, pageSize );
+		}
+	} else {
+		free( island );
+	}
+	
+	return err;
+}
+
+/***************************************************************************//**
+	Implementation: Sets the branch island's target, with an optional
+	instruction.
+	
+	@param	island		->	The branch island to insert target into.
+	@param	branchTo	->	The address of the target.
+	@param	instruction	->	Optional instruction to execute prior to branch. Set
+							to zero for nop.
+	@result				<-	mach_error_t
+
+	***************************************************************************/
+#if defined(__ppc__) || defined(__POWERPC__)
+	mach_error_t
+setBranchIslandTarget(
+		BranchIsland	*island,
+		const void		*branchTo,
+		long			instruction )
+{
+	//	Copy over the template code.
+    bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
+    
+    //	Fill in the address.
+    ((short*)island->instructions)[kAddressLo] = ((long) branchTo) & 0x0000FFFF;
+    ((short*)island->instructions)[kAddressHi]
+    	= (((long) branchTo) >> 16) & 0x0000FFFF;
+    
+    //	Fill in the (optional) instuction.
+    if( instruction != 0 ) {
+        ((short*)island->instructions)[kInstructionLo]
+        	= instruction & 0x0000FFFF;
+        ((short*)island->instructions)[kInstructionHi]
+        	= (instruction >> 16) & 0x0000FFFF;
+    }
+    
+    //MakeDataExecutable( island->instructions, sizeof( kIslandTemplate ) );
+	msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
+    
+    return err_none;
+}
+#endif 
+
+#if defined(__i386__)
+	mach_error_t
+setBranchIslandTarget_i386(
+	BranchIsland	*island,
+	const void		*branchTo,
+	char*			instructions )
+{
+
+	//	Copy over the template code.
+    bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
+
+	// copy original instructions
+	if (instructions) {
+		bcopy (instructions, island->instructions + kInstructions, kOriginalInstructionsSize);
+	}
+	
+    // Fill in the address.
+    int32_t addressOffset = (char *)branchTo - (island->instructions + kJumpAddress + 4);
+    *((int32_t *)(island->instructions + kJumpAddress)) = addressOffset; 
+
+    msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
+    return err_none;
+}
+
+#elif defined(__x86_64__)
+mach_error_t
+setBranchIslandTarget_i386(
+        BranchIsland	*island,
+        const void		*branchTo,
+        char*			instructions )
+{
+    // Copy over the template code.
+    bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
+
+    // Copy original instructions.
+    if (instructions) {
+        bcopy (instructions, island->instructions, kOriginalInstructionsSize);
+    }
+
+    //	Fill in the address.
+    *((uint64_t *)(island->instructions + kJumpAddress)) = (uint64_t)branchTo; 
+    msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
+
+    return err_none;
+}
+#endif
+
+
+#if defined(__i386__) || defined(__x86_64__)
+// simplistic instruction matching
+typedef struct {
+	unsigned int length; // max 15
+	unsigned char mask[15]; // sequence of bytes in memory order
+	unsigned char constraint[15]; // sequence of bytes in memory order
+}	AsmInstructionMatch;
+
+#if defined(__i386__)
+static AsmInstructionMatch possibleInstructions[] = {
+	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} },	// jmp 0x????????
+	{ 0x5, {0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, {0x55, 0x89, 0xe5, 0xc9, 0xc3} },	// push %esp; mov %esp,%ebp; leave; ret
+	{ 0x1, {0xFF}, {0x90} },							// nop
+	{ 0x1, {0xF8}, {0x50} },							// push %reg
+	{ 0x2, {0xFF, 0xFF}, {0x89, 0xE5} },				                // mov %esp,%ebp
+	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x89, 0x1C, 0x24} },				                // mov %ebx,(%esp)
+	{ 0x3, {0xFF, 0xFF, 0x00}, {0x83, 0xEC, 0x00} },	                        // sub 0x??, %esp
+	{ 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}, {0x81, 0xEC, 0x00, 0x00, 0x00, 0x00} },	// sub 0x??, %esp with 32bit immediate
+	{ 0x2, {0xFF, 0xFF}, {0x31, 0xC0} },						// xor %eax, %eax
+	{ 0x3, {0xFF, 0x4F, 0x00}, {0x8B, 0x45, 0x00} },  // mov $imm(%ebp), %reg
+	{ 0x3, {0xFF, 0x4C, 0x00}, {0x8B, 0x40, 0x00} },  // mov $imm(%eax-%edx), %reg
+	{ 0x3, {0xFF, 0xCF, 0x00}, {0x8B, 0x4D, 0x00} },  // mov $imm(%rpb), %reg
+	{ 0x3, {0xFF, 0x4F, 0x00}, {0x8A, 0x4D, 0x00} },  // mov $imm(%ebp), %cl
+	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x8B, 0x4C, 0x24, 0x00} },  			// mov $imm(%esp), %ecx
+	{ 0x4, {0xFF, 0x00, 0x00, 0x00}, {0x8B, 0x00, 0x00, 0x00} },  			// mov r16,r/m16 or r32,r/m32
+	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB9, 0x00, 0x00, 0x00, 0x00} }, 	// mov $imm, %ecx
+	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} }, 	// mov $imm, %eax
+	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x66, 0x0F, 0xEF, 0x00} },             	// pxor xmm2/128, xmm1
+	{ 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, 						// fninit
+	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE8, 0x00, 0x00, 0x00, 0x00} },	// call $imm
+	{ 0x0 }
+};
+#elif defined(__x86_64__)
+// TODO(glider): disassembling the "0x48, 0x89" sequences is trickier than it's done below.
+// If it stops working, refer to http://ref.x86asm.net/geek.html#modrm_byte_32_64 to do it
+// more accurately.
+// Note: 0x48 is in fact the REX.W prefix, but it might be wrong to treat it as a separate
+// instruction.
+static AsmInstructionMatch possibleInstructions[] = {
+	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} },	// jmp 0x????????
+	{ 0x1, {0xFF}, {0x90} },							// nop
+	{ 0x1, {0xF8}, {0x50} },							// push %rX
+	{ 0x1, {0xFF}, {0x65} },							// GS prefix
+	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x48, 0x89, 0xE5} },				// mov %rsp,%rbp
+	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xEC, 0x00} },	                // sub 0x??, %rsp
+	{ 0x4, {0xFB, 0xFF, 0x07, 0x00}, {0x48, 0x89, 0x05, 0x00} },	                // move onto rbp
+	{ 0x3, {0xFB, 0xFF, 0x00}, {0x48, 0x89, 0x00} },	                            // mov %reg, %reg
+	{ 0x3, {0xFB, 0xFF, 0x00}, {0x49, 0x89, 0x00} },	                            // mov %reg, %reg (REX.WB)
+	{ 0x2, {0xFF, 0x00}, {0x41, 0x00} },						// push %rXX
+	{ 0x2, {0xFF, 0x00}, {0x85, 0x00} },						// test %rX,%rX
+	{ 0x2, {0xFF, 0x00}, {0x77, 0x00} },						// ja $i8
+	{ 0x2, {0xFF, 0x00}, {0x74, 0x00} },						// je $i8
+	{ 0x5, {0xF8, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} },	// mov $imm, %reg
+	{ 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} },				// pushq $imm(%rdi)
+	{ 0x2, {0xFF, 0xFF}, {0x31, 0xC0} },						// xor %eax, %eax
+	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0x25, 0x00, 0x00, 0x00, 0x00} },	// and $imm, %eax
+	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x80, 0x3F, 0x00} },				// cmpb $imm, (%rdi)
+
+  { 0x8, {0xFF, 0xFF, 0xCF, 0xFF, 0x00, 0x00, 0x00, 0x00},
+         {0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00}, },                     // mov $imm, %{rax,rdx,rsp,rsi}
+  { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xFA, 0x00}, },   // cmp $i8, %rdx
+	{ 0x4, {0xFF, 0xFF, 0x00, 0x00}, {0x83, 0x7f, 0x00, 0x00}, },			// cmpl $imm, $imm(%rdi)
+	{ 0xa, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+               {0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} },    // mov $imm, %rax
+        { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00},
+               {0x81, 0xE6, 0x00, 0x00, 0x00, 0x00} },                            // and $imm, %esi
+        { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00},
+               {0xFF, 0x25, 0x00, 0x00, 0x00, 0x00} },                            // jmpq *(%rip)
+        { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x66, 0x0F, 0xEF, 0x00} },              // pxor xmm2/128, xmm1
+        { 0x2, {0xFF, 0x00}, {0x89, 0x00} },                               // mov r/m32,r32 or r/m16,r16
+        { 0x3, {0xFF, 0xFF, 0xFF}, {0x49, 0x89, 0xF8} },                   // mov %rdi,%r8        
+        { 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} },  // pushq $imm(%rdi)
+        { 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, // fninit
+	{ 0x0 }
+};
+#endif
+
+static Boolean codeMatchesInstruction(unsigned char *code, AsmInstructionMatch* instruction) 
+{
+	Boolean match = true;
+	
+	size_t i;
+  assert(instruction);
+#ifdef DEBUG_DISASM
+	fprintf(stderr, "Matching: ");
+#endif  
+	for (i=0; i<instruction->length; i++) {
+		unsigned char mask = instruction->mask[i];
+		unsigned char constraint = instruction->constraint[i];
+		unsigned char codeValue = code[i];
+#ifdef DEBUG_DISASM
+		fprintf(stderr, "%x ", (unsigned)codeValue);
+#endif    
+		match = ((codeValue & mask) == constraint);
+		if (!match) break;
+	}
+#ifdef DEBUG_DISASM
+	if (match) {
+		fprintf(stderr, " OK\n");
+	} else {
+		fprintf(stderr, " FAIL\n");
+	}
+#endif  
+	return match;
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+	static Boolean 
+eatKnownInstructions( 
+	unsigned char	*code, 
+	uint64_t		*newInstruction,
+	int				*howManyEaten, 
+	char			*originalInstructions,
+	int				*originalInstructionCount, 
+	uint8_t			*originalInstructionSizes )
+{
+	Boolean allInstructionsKnown = true;
+	int totalEaten = 0;
+	unsigned char* ptr = code;
+	int remainsToEat = 5; // a JMP instruction takes 5 bytes
+	int instructionIndex = 0;
+	
+	if (howManyEaten) *howManyEaten = 0;
+	if (originalInstructionCount) *originalInstructionCount = 0;
+	while (remainsToEat > 0) {
+		Boolean curInstructionKnown = false;
+		
+		// See if instruction matches one  we know
+		AsmInstructionMatch* curInstr = possibleInstructions;
+		do { 
+			if ((curInstructionKnown = codeMatchesInstruction(ptr, curInstr))) break;
+			curInstr++;
+		} while (curInstr->length > 0);
+		
+		// if all instruction matches failed, we don't know current instruction then, stop here
+		if (!curInstructionKnown) { 
+			allInstructionsKnown = false;
+			fprintf(stderr, "mach_override: some instructions unknown! Need to update mach_override.c\n");
+			break;
+		}
+		
+		// At this point, we've matched curInstr
+		int eaten = curInstr->length;
+		ptr += eaten;
+		remainsToEat -= eaten;
+		totalEaten += eaten;
+		
+		if (originalInstructionSizes) originalInstructionSizes[instructionIndex] = eaten;
+		instructionIndex += 1;
+		if (originalInstructionCount) *originalInstructionCount = instructionIndex;
+	}
+
+
+	if (howManyEaten) *howManyEaten = totalEaten;
+
+	if (originalInstructions) {
+		Boolean enoughSpaceForOriginalInstructions = (totalEaten < kOriginalInstructionsSize);
+		
+		if (enoughSpaceForOriginalInstructions) {
+			memset(originalInstructions, 0x90 /* NOP */, kOriginalInstructionsSize); // fill instructions with NOP
+			bcopy(code, originalInstructions, totalEaten);
+		} else {
+#ifdef DEBUG_DISASM
+			fprintf(stderr, "Not enough space in island to store original instructions. Adapt the island definition and kOriginalInstructionsSize\n");
+#endif      
+			return false;
+		}
+	}
+	
+	if (allInstructionsKnown) {
+		// save last 3 bytes of first 64bits of codre we'll replace
+		uint64_t currentFirst64BitsOfCode = *((uint64_t *)code);
+		currentFirst64BitsOfCode = OSSwapInt64(currentFirst64BitsOfCode); // back to memory representation
+		currentFirst64BitsOfCode &= 0x0000000000FFFFFFLL; 
+		
+		// keep only last 3 instructions bytes, first 5 will be replaced by JMP instr
+		*newInstruction &= 0xFFFFFFFFFF000000LL; // clear last 3 bytes
+		*newInstruction |= (currentFirst64BitsOfCode & 0x0000000000FFFFFFLL); // set last 3 bytes
+	}
+
+	return allInstructionsKnown;
+}
+
+	static void
+fixupInstructions(
+    void		*originalFunction,
+    void		*escapeIsland,
+    void		*instructionsToFix,
+	int			instructionCount,
+	uint8_t		*instructionSizes )
+{
+	void *initialOriginalFunction = originalFunction;
+	int	index, fixed_size, code_size = 0;
+	for (index = 0;index < instructionCount;index += 1)
+		code_size += instructionSizes[index];
+
+#ifdef DEBUG_DISASM
+	void *initialInstructionsToFix = instructionsToFix;
+	fprintf(stderr, "BEFORE FIXING:\n");
+	dump16Bytes(initialOriginalFunction);
+	dump16Bytes(initialInstructionsToFix);
+#endif  // DEBUG_DISASM
+
+	for (index = 0;index < instructionCount;index += 1)
+	{
+                fixed_size = instructionSizes[index];
+		if ((*(uint8_t*)instructionsToFix == 0xE9) || // 32-bit jump relative
+		    (*(uint8_t*)instructionsToFix == 0xE8))   // 32-bit call relative
+		{
+			uint32_t offset = (uintptr_t)originalFunction - (uintptr_t)escapeIsland;
+			uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 1);
+			*jumpOffsetPtr += offset;
+		}
+		if ((*(uint8_t*)instructionsToFix == 0x74) ||  // Near jump if equal (je), 2 bytes.
+		    (*(uint8_t*)instructionsToFix == 0x77))    // Near jump if above (ja), 2 bytes.
+		{
+			// We replace a near je/ja instruction, "7P JJ", with a 32-bit je/ja, "0F 8P WW XX YY ZZ".
+			// This is critical, otherwise a near jump will likely fall outside the original function.
+			uint32_t offset = (uintptr_t)initialOriginalFunction - (uintptr_t)escapeIsland;
+			uint32_t jumpOffset = *(uint8_t*)((uintptr_t)instructionsToFix + 1);
+			*(uint8_t*)(instructionsToFix + 1) = *(uint8_t*)instructionsToFix + 0x10;
+			*(uint8_t*)instructionsToFix = 0x0F;
+			uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 2 );
+			*jumpOffsetPtr = offset + jumpOffset;
+			fixed_size = 6;
+                }
+		
+		originalFunction = (void*)((uintptr_t)originalFunction + instructionSizes[index]);
+		escapeIsland = (void*)((uintptr_t)escapeIsland + instructionSizes[index]);
+		instructionsToFix = (void*)((uintptr_t)instructionsToFix + fixed_size);
+
+		// Expanding short instructions into longer ones may overwrite the next instructions,
+		// so we must restore them.
+		code_size -= fixed_size;
+		if ((code_size > 0) && (fixed_size != instructionSizes[index])) {
+			bcopy(originalFunction, instructionsToFix, code_size);
+		}
+	}
+#ifdef DEBUG_DISASM
+	fprintf(stderr, "AFTER_FIXING:\n");
+	dump16Bytes(initialOriginalFunction);
+	dump16Bytes(initialInstructionsToFix);
+#endif  // DEBUG_DISASM
+}
+
+#ifdef DEBUG_DISASM
+#define HEX_DIGIT(x) ((((x) % 16) < 10) ? ('0' + ((x) % 16)) : ('A' + ((x) % 16 - 10)))
+
+	static void
+dump16Bytes(
+	void 	*ptr) {
+	int i;
+	char buf[3];
+	uint8_t *bytes = (uint8_t*)ptr;
+	for (i = 0; i < 16; i++) {
+		buf[0] = HEX_DIGIT(bytes[i] / 16);
+		buf[1] = HEX_DIGIT(bytes[i] % 16);
+		buf[2] = ' ';
+		write(2, buf, 3);
+	}
+	write(2, "\n", 1);
+}
+#endif  // DEBUG_DISASM
+#endif
+
+#if defined(__i386__)
+__asm(
+			".text;"
+			".align 2, 0x90;"
+			"_atomic_mov64:;"
+			"	pushl %ebp;"
+			"	movl %esp, %ebp;"
+			"	pushl %esi;"
+			"	pushl %ebx;"
+			"	pushl %ecx;"
+			"	pushl %eax;"
+			"	pushl %edx;"
+	
+			// atomic push of value to an address
+			// we use cmpxchg8b, which compares content of an address with 
+			// edx:eax. If they are equal, it atomically puts 64bit value 
+			// ecx:ebx in address. 
+			// We thus put contents of address in edx:eax to force ecx:ebx
+			// in address
+			"	mov		8(%ebp), %esi;"  // esi contains target address
+			"	mov		12(%ebp), %ebx;"
+			"	mov		16(%ebp), %ecx;" // ecx:ebx now contains value to put in target address
+			"	mov		(%esi), %eax;"
+			"	mov		4(%esi), %edx;"  // edx:eax now contains value currently contained in target address
+			"	lock; cmpxchg8b	(%esi);" // atomic move.
+			
+			// restore registers
+			"	popl %edx;"
+			"	popl %eax;"
+			"	popl %ecx;"
+			"	popl %ebx;"
+			"	popl %esi;"
+			"	popl %ebp;"
+			"	ret"
+);
+#elif defined(__x86_64__)
+void atomic_mov64(
+		uint64_t *targetAddress,
+		uint64_t value )
+{
+    *targetAddress = value;
+}
+#endif
+#endif
+#endif  // __APPLE__
diff --git a/lib/asan/interception/mach_override/mach_override.h b/lib/asan/interception/mach_override/mach_override.h
new file mode 100644
index 0000000..7e60cdc
--- /dev/null
+++ b/lib/asan/interception/mach_override/mach_override.h
@@ -0,0 +1,140 @@
+/*******************************************************************************
+	mach_override.h
+		Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
+		Some rights reserved: <http://opensource.org/licenses/mit-license.php>
+
+	***************************************************************************/
+
+/***************************************************************************//**
+	@mainpage	mach_override
+	@author		Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
+	
+	This package, coded in C to the Mach API, allows you to override ("patch")
+	program- and system-supplied functions at runtime. You can fully replace
+	functions with your implementations, or merely head- or tail-patch the
+	original implementations.
+	
+	Use it by #include'ing mach_override.h from your .c, .m or .mm file(s).
+	
+	@todo	Discontinue use of Carbon's MakeDataExecutable() and
+			CompareAndSwap() calls and start using the Mach equivalents, if they
+			exist. If they don't, write them and roll them in. That way, this
+			code will be pure Mach, which will make it easier to use everywhere.
+			Update: MakeDataExecutable() has been replaced by
+			msync(MS_INVALIDATE). There is an OSCompareAndSwap in libkern, but
+			I'm currently unsure if I can link against it. May have to roll in
+			my own version...
+	@todo	Stop using an entire 4K high-allocated VM page per 28-byte escape
+			branch island. Done right, this will dramatically speed up escape
+			island allocations when they number over 250. Then again, if you're
+			overriding more than 250 functions, maybe speed isn't your main
+			concern...
+	@todo	Add detection of: b, bl, bla, bc, bcl, bcla, bcctrl, bclrl
+			first-instructions. Initially, we should refuse to override
+			functions beginning with these instructions. Eventually, we should
+			dynamically rewrite them to make them position-independent.
+	@todo	Write mach_unoverride(), which would remove an override placed on a
+			function. Must be multiple-override aware, which means an almost
+			complete rewrite under the covers, because the target address can't
+			be spread across two load instructions like it is now since it will
+			need to be atomically updatable.
+	@todo	Add non-rentry variants of overrides to test_mach_override.
+
+	***************************************************************************/
+
+#ifdef __APPLE__
+
+#ifndef		_mach_override_
+#define		_mach_override_
+
+#include <sys/types.h>
+#include <mach/error.h>
+
+#ifdef	__cplusplus
+	extern	"C"	{
+#endif
+
+/**
+	Returned if the function to be overrided begins with a 'mfctr' instruction.
+*/
+#define	err_cannot_override	(err_local|1)
+
+/************************************************************************************//**
+	Dynamically overrides the function implementation referenced by
+	originalFunctionAddress with the implentation pointed to by overrideFunctionAddress.
+	Optionally returns a pointer to a "reentry island" which, if jumped to, will resume
+	the original implementation.
+	
+	@param	originalFunctionAddress			->	Required address of the function to
+												override (with overrideFunctionAddress).
+	@param	overrideFunctionAddress			->	Required address to the overriding
+												function.
+	@param	originalFunctionReentryIsland	<-	Optional pointer to pointer to the
+												reentry island. Can be NULL.
+	@result									<-	err_cannot_override if the original
+												function's implementation begins with
+												the 'mfctr' instruction.
+
+	************************************************************************************/
+
+// We're prefixing mach_override_ptr() with "__asan_" to avoid name conflicts with other
+// mach_override_ptr() implementations that may appear in the client program.
+    mach_error_t
+__asan_mach_override_ptr(
+	void *originalFunctionAddress,
+    const void *overrideFunctionAddress,
+    void **originalFunctionReentryIsland );
+
+// Allow to use custom allocation and deallocation routines with mach_override_ptr().
+// This should help to speed up the things on x86_64.
+typedef mach_error_t island_malloc( void **ptr, size_t size, void *hint );
+typedef mach_error_t island_free( void *ptr );
+
+    mach_error_t
+__asan_mach_override_ptr_custom(
+	void *originalFunctionAddress,
+    const void *overrideFunctionAddress,
+    void **originalFunctionReentryIsland,
+    island_malloc *alloc,
+    island_free *dealloc );
+
+/************************************************************************************//**
+	
+
+	************************************************************************************/
+ 
+#ifdef	__cplusplus
+
+#define MACH_OVERRIDE( ORIGINAL_FUNCTION_RETURN_TYPE, ORIGINAL_FUNCTION_NAME, ORIGINAL_FUNCTION_ARGS, ERR )			\
+	{																												\
+		static ORIGINAL_FUNCTION_RETURN_TYPE (*ORIGINAL_FUNCTION_NAME##_reenter)ORIGINAL_FUNCTION_ARGS;				\
+		static bool ORIGINAL_FUNCTION_NAME##_overriden = false;														\
+		class mach_override_class__##ORIGINAL_FUNCTION_NAME {														\
+		public:																										\
+			static kern_return_t override(void *originalFunctionPtr) {												\
+				kern_return_t result = err_none;																	\
+				if (!ORIGINAL_FUNCTION_NAME##_overriden) {															\
+					ORIGINAL_FUNCTION_NAME##_overriden = true;														\
+					result = mach_override_ptr( (void*)originalFunctionPtr,											\
+												(void*)mach_override_class__##ORIGINAL_FUNCTION_NAME::replacement,	\
+												(void**)&ORIGINAL_FUNCTION_NAME##_reenter );						\
+				}																									\
+				return result;																						\
+			}																										\
+			static ORIGINAL_FUNCTION_RETURN_TYPE replacement ORIGINAL_FUNCTION_ARGS {
+
+#define END_MACH_OVERRIDE( ORIGINAL_FUNCTION_NAME )																	\
+			}																										\
+		};																											\
+																													\
+		err = mach_override_class__##ORIGINAL_FUNCTION_NAME::override((void*)ORIGINAL_FUNCTION_NAME);				\
+	}
+ 
+#endif
+
+#ifdef	__cplusplus
+	}
+#endif
+#endif	//	_mach_override_
+
+#endif  // __APPLE__
diff --git a/lib/asan/output_tests/clone_test.cc b/lib/asan/output_tests/clone_test.cc
new file mode 100644
index 0000000..b18d255
--- /dev/null
+++ b/lib/asan/output_tests/clone_test.cc
@@ -0,0 +1,34 @@
+#ifdef __linux__
+#include <stdio.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+int Child(void *arg) {
+  char x[32] = {0};  // Stack gets poisoned.
+  printf("Child:  %p\n", x);
+  _exit(1);  // NoReturn, stack will remain unpoisoned unless we do something.
+}
+
+int main(int argc, char **argv) {
+  const int kStackSize = 1 << 20;
+  char child_stack[kStackSize + 1];
+  char *sp = child_stack + kStackSize;  // Stack grows down.
+  printf("Parent: %p\n", sp);
+  pid_t clone_pid = clone(Child, sp, CLONE_FILES | CLONE_VM, NULL, 0, 0, 0);
+  waitpid(clone_pid, NULL, 0);
+  for (int i = 0; i < kStackSize; i++)
+    child_stack[i] = i;
+  int ret = child_stack[argc - 1];
+  printf("PASSED\n");
+  return ret;
+}
+#else  // not __linux__
+#include <stdio.h>
+int main() {
+  printf("PASSED\n");
+  // Check-Common: PASSED
+}
+#endif
diff --git a/lib/asan/output_tests/deep_tail_call.cc b/lib/asan/output_tests/deep_tail_call.cc
new file mode 100644
index 0000000..cb69e89
--- /dev/null
+++ b/lib/asan/output_tests/deep_tail_call.cc
@@ -0,0 +1,15 @@
+// Check-Common: AddressSanitizer global-buffer-overflow
+int global[10];
+// Check-Common: {{#0.*call4}}
+void __attribute__((noinline)) call4(int i) { global[i+10]++; }
+// Check-Common: {{#1.*call3}}
+void __attribute__((noinline)) call3(int i) { call4(i); }
+// Check-Common: {{#2.*call2}}
+void __attribute__((noinline)) call2(int i) { call3(i); }
+// Check-Common: {{#3.*call1}}
+void __attribute__((noinline)) call1(int i) { call2(i); }
+// Check-Common: {{#4.*main}}
+int main(int argc, char **argv) {
+  call1(argc);
+  return global[0];
+}
diff --git a/lib/asan/output_tests/dlclose-test-so.cc b/lib/asan/output_tests/dlclose-test-so.cc
new file mode 100644
index 0000000..73e0050
--- /dev/null
+++ b/lib/asan/output_tests/dlclose-test-so.cc
@@ -0,0 +1,33 @@
+//===----------- dlclose-test-so.cc -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Regression test for
+// http://code.google.com/p/address-sanitizer/issues/detail?id=19
+//===----------------------------------------------------------------------===//
+#include <stdio.h>
+
+static int pad1;
+static int static_var;
+static int pad2;
+
+extern "C"
+int *get_address_of_static_var() {
+  return &static_var;
+}
+
+__attribute__((constructor))
+void at_dlopen() {
+  printf("%s: I am being dlopened\n", __FILE__);
+}
+__attribute__((destructor))
+void at_dlclose() {
+  printf("%s: I am being dlclosed\n", __FILE__);
+}
diff --git a/lib/asan/output_tests/dlclose-test.cc b/lib/asan/output_tests/dlclose-test.cc
new file mode 100644
index 0000000..16126eb
--- /dev/null
+++ b/lib/asan/output_tests/dlclose-test.cc
@@ -0,0 +1,74 @@
+//===----------- dlclose-test.cc --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Regression test for
+// http://code.google.com/p/address-sanitizer/issues/detail?id=19
+// Bug description:
+// 1. application dlopens foo.so
+// 2. asan registers all globals from foo.so
+// 3. application dlcloses foo.so
+// 4. application mmaps some memory to the location where foo.so was before
+// 5. application starts using this mmaped memory, but asan still thinks there
+// are globals.
+// 6. BOOM
+//===----------------------------------------------------------------------===//
+#include <assert.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <string>
+
+using std::string;
+
+static const int kPageSize = 4096;
+
+typedef int *(fun_t)();
+
+int main(int argc, char *argv[]) {
+  string path = string(argv[0]) + "-so.so";
+  printf("opening %s ... \n", path.c_str());
+  void *lib = dlopen(path.c_str(), RTLD_NOW);
+  if (!lib) {
+    printf("error in dlopen(): %s\n", dlerror());
+    return 1;
+  }
+  fun_t *get = (fun_t*)dlsym(lib, "get_address_of_static_var");
+  if (!get) {
+    printf("failed dlsym\n");
+    return 1;
+  }
+  int *addr = get();
+  assert(((size_t)addr % 32) == 0);  // should be 32-byte aligned.
+  printf("addr: %p\n", addr);
+  addr[0] = 1;  // make sure we can write there.
+
+  // Now dlclose the shared library.
+  printf("attempting to dlclose\n");
+  if (dlclose(lib)) {
+    printf("failed to dlclose\n");
+    return 1;
+  }
+  // Now, the page where 'addr' is unmapped. Map it.
+  size_t page_beg = ((size_t)addr) & ~(kPageSize - 1);
+  void *res = mmap((void*)(page_beg), kPageSize,
+                   PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE, 0, 0);
+  if (res == (char*)-1L) {
+    printf("failed to mmap\n");
+    return 1;
+  }
+  addr[1] = 2;  // BOOM (if the bug is not fixed).
+  printf("PASS\n");
+  // Check-Common: PASS
+  return 0;
+}
diff --git a/lib/asan/output_tests/global-overflow.cc b/lib/asan/output_tests/global-overflow.cc
new file mode 100644
index 0000000..a63eb73
--- /dev/null
+++ b/lib/asan/output_tests/global-overflow.cc
@@ -0,0 +1,16 @@
+#include <string.h>
+int main(int argc, char **argv) {
+  static char XXX[10];
+  static char YYY[10];
+  static char ZZZ[10];
+  memset(XXX, 0, 10);
+  memset(YYY, 0, 10);
+  memset(ZZZ, 0, 10);
+  int res = YYY[argc * 10];  // BOOOM
+  // Check-Common: {{READ of size 1 at 0x.* thread T0}}
+  // Check-Common: {{    #0 0x.* in main .*global-overflow.cc:9}}
+  // Check-Common: {{0x.* is located 0 bytes to the right of global variable}}
+  // Check-Common:   {{.*YYY.* of size 10}}
+  res += XXX[argc] + ZZZ[argc];
+  return res;
+}
diff --git a/lib/asan/output_tests/heap-overflow.cc b/lib/asan/output_tests/heap-overflow.cc
new file mode 100644
index 0000000..a8656c6
--- /dev/null
+++ b/lib/asan/output_tests/heap-overflow.cc
@@ -0,0 +1,22 @@
+#include <stdlib.h>
+#include <string.h>
+int main(int argc, char **argv) {
+  char *x = (char*)malloc(10 * sizeof(char));
+  memset(x, 0, 10);
+  int res = x[argc * 10];  // BOOOM
+  free(x);
+  return res;
+}
+
+// Check-Common: {{READ of size 1 at 0x.* thread T0}}
+// Check-Common: {{    #0 0x.* in main .*heap-overflow.cc:6}}
+// Check-Common: {{0x.* is located 0 bytes to the right of 10-byte region}}
+// Check-Common: {{allocated by thread T0 here:}}
+
+// Check-Linux: {{    #0 0x.* in malloc}}
+// Check-Linux: {{    #1 0x.* in main .*heap-overflow.cc:[45]}}
+
+// Check-Darwin: {{    #0 0x.* in .*mz_malloc.*}}
+// Check-Darwin: {{    #1 0x.* in malloc_zone_malloc.*}}
+// Check-Darwin: {{    #2 0x.* in malloc.*}}
+// Check-Darwin: {{    #3 0x.* in main heap-overflow.cc:[45]}}
diff --git a/lib/asan/output_tests/large_func_test.cc b/lib/asan/output_tests/large_func_test.cc
new file mode 100644
index 0000000..2980d0b
--- /dev/null
+++ b/lib/asan/output_tests/large_func_test.cc
@@ -0,0 +1,43 @@
+#include <stdlib.h>
+__attribute__((noinline))
+static void LargeFunction(int *x, int zero) {
+  x[0]++;
+  x[1]++;
+  x[2]++;
+  x[3]++;
+  x[4]++;
+  x[5]++;
+  x[6]++;
+  x[7]++;
+  x[8]++;
+  x[9]++;
+
+  x[zero + 111]++;  // we should report this exact line
+
+  x[10]++;
+  x[11]++;
+  x[12]++;
+  x[13]++;
+  x[14]++;
+  x[15]++;
+  x[16]++;
+  x[17]++;
+  x[18]++;
+  x[19]++;
+}
+
+int main(int argc, char **argv) {
+  int *x = new int[100];
+  LargeFunction(x, argc - 1);
+  delete x;
+}
+
+// Check-Common: {{.*ERROR: AddressSanitizer heap-buffer-overflow on address}}
+// Check-Common:   {{0x.* at pc 0x.* bp 0x.* sp 0x.*}}
+// Check-Common: {{READ of size 4 at 0x.* thread T0}}
+// Check-Common: {{    #0 0x.* in LargeFunction.*large_func_test.cc:15}}
+// Check-Common: {{    #1 0x.* in main .*large_func_test.cc:3[012]}}
+// Check-Common: {{0x.* is located 44 bytes to the right of 400-byte region}}
+// Check-Common: {{allocated by thread T0 here:}}
+// Check-Common: {{    #0 0x.* in operator new.*}}
+// Check-Common: {{    #1 0x.* in main .*large_func_test.cc:30}}
diff --git a/lib/asan/output_tests/memcmp_test.cc b/lib/asan/output_tests/memcmp_test.cc
new file mode 100644
index 0000000..d0e5a43
--- /dev/null
+++ b/lib/asan/output_tests/memcmp_test.cc
@@ -0,0 +1,10 @@
+#include <string.h>
+int main(int argc, char **argv) {
+  char a1[] = {argc, 2, 3, 4};
+  char a2[] = {1, 2*argc, 3, 4};
+// Check-Common: AddressSanitizer stack-buffer-overflow
+// Check-Common: {{#0.*memcmp}}
+// Check-Common: {{#1.*main}}
+  int res = memcmp(a1, a2, 4 + argc);  // BOOM
+  return res;
+}
diff --git a/lib/asan/output_tests/null_deref.cc b/lib/asan/output_tests/null_deref.cc
new file mode 100644
index 0000000..e0fac94
--- /dev/null
+++ b/lib/asan/output_tests/null_deref.cc
@@ -0,0 +1,17 @@
+__attribute__((noinline))
+static void NullDeref(int *ptr) {
+  ptr[10]++;
+}
+int main() {
+  NullDeref((int*)0);
+}
+
+// Check-Common: {{.*ERROR: AddressSanitizer crashed on unknown address}}
+// Check-Common:   {{0x0*00028 .*pc 0x.*}}
+// Check-Common: {{AddressSanitizer can not provide additional info. ABORTING}}
+
+// atos on Mac cannot resolve the file:line info for frame 0 on the O1 level
+// Check-Linux: {{    #0 0x.* in NullDeref.*null_deref.cc:3}}
+// Check-Darwin: {{    #0 0x.* in NullDeref.*}}
+
+// Check-Common: {{    #1 0x.* in main.*null_deref.cc:[67]}}
diff --git a/lib/asan/output_tests/shared-lib-test-so.cc b/lib/asan/output_tests/shared-lib-test-so.cc
new file mode 100644
index 0000000..686a245
--- /dev/null
+++ b/lib/asan/output_tests/shared-lib-test-so.cc
@@ -0,0 +1,21 @@
+//===----------- shared-lib-test-so.cc --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#include <stdio.h>
+
+int pad[10];
+int GLOB[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+extern "C"
+void inc(int index) {
+  GLOB[index]++;
+}
diff --git a/lib/asan/output_tests/shared-lib-test.cc b/lib/asan/output_tests/shared-lib-test.cc
new file mode 100644
index 0000000..bf47068
--- /dev/null
+++ b/lib/asan/output_tests/shared-lib-test.cc
@@ -0,0 +1,42 @@
+//===----------- shared-lib-test.cc -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#include <dlfcn.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+
+using std::string;
+
+typedef void (fun_t)(int x);
+
+int main(int argc, char *argv[]) {
+  string path = string(argv[0]) + "-so.so";
+  printf("opening %s ... \n", path.c_str());
+  void *lib = dlopen(path.c_str(), RTLD_NOW);
+  if (!lib) {
+    printf("error in dlopen(): %s\n", dlerror());
+    return 1;
+  }
+  fun_t *inc = (fun_t*)dlsym(lib, "inc");
+  if (!inc) return 1;
+  printf("ok\n");
+  inc(1);
+  inc(-1);  // BOOM
+  return 0;
+}
+
+// Check-Common: {{.*ERROR: AddressSanitizer global-buffer-overflow}}
+// Check-Common: {{READ of size 4 at 0x.* thread T0}}
+// Check-Common: {{    #0 0x.*}}
+// Check-Common: {{    #1 0x.* in main .*shared-lib-test.cc:3[567]}}
diff --git a/lib/asan/output_tests/stack-overflow.cc b/lib/asan/output_tests/stack-overflow.cc
new file mode 100644
index 0000000..35fa8a6
--- /dev/null
+++ b/lib/asan/output_tests/stack-overflow.cc
@@ -0,0 +1,11 @@
+#include <string.h>
+int main(int argc, char **argv) {
+  char x[10];
+  memset(x, 0, 10);
+  int res = x[argc * 10];  // BOOOM
+  return res;
+}
+
+// Check-Common: {{READ of size 1 at 0x.* thread T0}}
+// Check-Common: {{    #0 0x.* in main .*stack-overflow.cc:5}}
+// Check-Common: {{Address 0x.* is .* frame <main>}}
diff --git a/lib/asan/output_tests/stack-use-after-return.cc.disabled b/lib/asan/output_tests/stack-use-after-return.cc.disabled
new file mode 100644
index 0000000..f497157
--- /dev/null
+++ b/lib/asan/output_tests/stack-use-after-return.cc.disabled
@@ -0,0 +1,27 @@
+#include <stdio.h>
+
+__attribute__((noinline))
+char *Ident(char *x) {
+  fprintf(stderr, "1: %p\n", x);
+  return x;
+}
+
+__attribute__((noinline))
+char *Func1() {
+  char local;
+  return Ident(&local);
+}
+
+__attribute__((noinline))
+void Func2(char *x) {
+  fprintf(stderr, "2: %p\n", x);
+  *x = 1;
+  // Check-Common: {{WRITE of size 1 .* thread T0}}
+  // Check-Common: {{    #0.*Func2.*stack-use-after-return.cc:18}}
+  // Check-Common: {{is located in frame <.*Func1.*> of T0's stack}}
+}
+
+int main(int argc, char **argv) {
+  Func2(Func1());
+  return 0;
+}
diff --git a/lib/asan/output_tests/strncpy-overflow.cc b/lib/asan/output_tests/strncpy-overflow.cc
new file mode 100644
index 0000000..2346188
--- /dev/null
+++ b/lib/asan/output_tests/strncpy-overflow.cc
@@ -0,0 +1,24 @@
+#include <string.h>
+#include <stdlib.h>
+int main(int argc, char **argv) {
+  char *hello = (char*)malloc(6);
+  strcpy(hello, "hello");
+  char *short_buffer = (char*)malloc(9);
+  strncpy(short_buffer, hello, 10);  // BOOM
+  return short_buffer[8];
+}
+
+// Check-Common: {{WRITE of size 1 at 0x.* thread T0}}
+// Check-Linux: {{    #0 0x.* in strncpy}}
+// Check-Darwin: {{    #0 0x.* in wrap_strncpy}}
+// Check-Common: {{    #1 0x.* in main .*strncpy-overflow.cc:[78]}}
+// Check-Common: {{0x.* is located 0 bytes to the right of 9-byte region}}
+// Check-Common: {{allocated by thread T0 here:}}
+
+// Check-Linux: {{    #0 0x.* in malloc}}
+// Check-Linux: {{    #1 0x.* in main .*strncpy-overflow.cc:6}}
+
+// Check-Darwin: {{    #0 0x.* in .*mz_malloc.*}}
+// Check-Darwin: {{    #1 0x.* in malloc_zone_malloc.*}}
+// Check-Darwin: {{    #2 0x.* in malloc.*}}
+// Check-Darwin: {{    #3 0x.* in main .*strncpy-overflow.cc:6}}
diff --git a/lib/asan/output_tests/test_output.sh b/lib/asan/output_tests/test_output.sh
new file mode 100755
index 0000000..ca8beb5
--- /dev/null
+++ b/lib/asan/output_tests/test_output.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+set -e # fail on any error
+
+OS=`uname`
+CXX=$1
+CC=$2
+FILE_CHECK=$3
+CXXFLAGS="-mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -g"
+SYMBOLIZER=../scripts/asan_symbolize.py
+TMP_ASAN_REPORT=asan_report.tmp
+
+run_program() {
+  ./$1 2>&1 | $SYMBOLIZER 2> /dev/null | c++filt > $TMP_ASAN_REPORT
+}
+
+# check_program exe_file source_file check_prefix
+check_program() {
+  run_program $1
+  $FILE_CHECK $2 --check-prefix=$3 < $TMP_ASAN_REPORT
+  rm -f $TMP_ASAN_REPORT
+}
+
+C_TEST=use-after-free
+echo "Sanity checking a test in pure C"
+$CC -g -faddress-sanitizer -O2 $C_TEST.c
+check_program a.out $C_TEST.c CHECK
+rm ./a.out
+
+echo "Sanity checking a test in pure C with -pie"
+$CC -g -faddress-sanitizer -O2 $C_TEST.c -pie
+check_program a.out $C_TEST.c CHECK
+rm ./a.out
+
+echo "Testing sleep_before_dying"
+$CC -g -faddress-sanitizer -O2 $C_TEST.c
+export ASAN_OPTIONS="sleep_before_dying=1"
+check_program a.out $C_TEST.c CHECKSLEEP
+export ASAN_OPTIONS=""
+rm ./a.out
+
+for t in  *.cc; do
+  for b in 32 64; do
+    for O in 0 1 2 3; do
+      c=`basename $t .cc`
+      if [[ "$c" == *"-so" ]]
+      then
+        continue
+      fi
+      c_so=$c-so
+      exe=$c.$b.O$O
+      so=$c.$b.O$O-so.so
+      echo testing $exe
+      build_command="$CXX $CXXFLAGS -m$b -faddress-sanitizer -O$O $c.cc -o $exe"
+      [ "$DEBUG" == "1" ] && echo $build_command
+      $build_command
+      [ -e "$c_so.cc" ] && $CXX $CXXFLAGS -m$b -faddress-sanitizer -O$O $c_so.cc -fPIC -shared -o $so
+      run_program $exe
+      # Check common expected lines for OS.
+      $FILE_CHECK $c.cc --check-prefix="Check-Common" < $TMP_ASAN_REPORT
+      # Check OS-specific lines.
+      if [ `grep -c "Check-$OS" $c.cc` -gt 0 ]
+      then
+        $FILE_CHECK $c.cc --check-prefix="Check-$OS" < $TMP_ASAN_REPORT
+      fi
+      rm ./$exe
+      rm ./$TMP_ASAN_REPORT
+      [ -e "$so" ] && rm ./$so
+    done
+  done
+done
+
+exit 0
diff --git a/lib/asan/output_tests/use-after-free.c b/lib/asan/output_tests/use-after-free.c
new file mode 100644
index 0000000..801d3f6
--- /dev/null
+++ b/lib/asan/output_tests/use-after-free.c
@@ -0,0 +1,9 @@
+#include <stdlib.h>
+int main() {
+  char *x = (char*)malloc(10 * sizeof(char));
+  free(x);
+  return x[5];
+}
+
+// CHECK: heap-use-after-free
+// CHECKSLEEP: Sleeping for 1 second
diff --git a/lib/asan/output_tests/use-after-free.cc b/lib/asan/output_tests/use-after-free.cc
new file mode 100644
index 0000000..a782d62
--- /dev/null
+++ b/lib/asan/output_tests/use-after-free.cc
@@ -0,0 +1,31 @@
+#include <stdlib.h>
+int main() {
+  char *x = (char*)malloc(10 * sizeof(char));
+  free(x);
+  return x[5];
+}
+
+// Check-Common: {{.*ERROR: AddressSanitizer heap-use-after-free on address}}
+// Check-Common:   {{0x.* at pc 0x.* bp 0x.* sp 0x.*}}
+// Check-Common: {{READ of size 1 at 0x.* thread T0}}
+// Check-Common: {{    #0 0x.* in main .*use-after-free.cc:5}}
+// Check-Common: {{0x.* is located 5 bytes inside of 10-byte region .0x.*,0x.*}}
+// Check-Common: {{freed by thread T0 here:}}
+
+// Check-Linux: {{    #0 0x.* in free}}
+// Check-Linux: {{    #1 0x.* in main .*use-after-free.cc:[45]}}
+
+// Check-Darwin: {{    #0 0x.* in .*mz_free.*}}
+// We override free() on Darwin, thus no malloc_zone_free
+// Check-Darwin: {{    #1 0x.* in free}}
+// Check-Darwin: {{    #2 0x.* in main .*use-after-free.cc:[45]}}
+
+// Check-Common: {{previously allocated by thread T0 here:}}
+
+// Check-Linux: {{    #0 0x.* in malloc}}
+// Check-Linux: {{    #1 0x.* in main .*use-after-free.cc:3}}
+
+// Check-Darwin: {{    #0 0x.* in .*mz_malloc.*}}
+// Check-Darwin: {{    #1 0x.* in malloc_zone_malloc.*}}
+// Check-Darwin: {{    #2 0x.* in malloc.*}}
+// Check-Darwin: {{    #3 0x.* in main .*use-after-free.cc:3}}
diff --git a/lib/asan/scripts/asan_symbolize.py b/lib/asan/scripts/asan_symbolize.py
new file mode 100755
index 0000000..357593d
--- /dev/null
+++ b/lib/asan/scripts/asan_symbolize.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+import os
+import re
+import sys
+import string
+import subprocess
+
+pipes = {}
+filetypes = {}
+DEBUG=False
+
+def patch_address(frameno, addr_s):
+  ''' Subtracts 1 or 2 from the top frame's address.
+  Top frame is normally the return address from asan_report*
+  call, which is not expected to return at all. Because of that, this
+  address often belongs to the next source code line, or even to a different
+  function. '''
+  if frameno == '0':
+    addr = int(addr_s, 16)
+    if os.uname()[4].startswith('arm'):
+      # Cancel the Thumb bit
+      addr = addr & (~1)
+    addr -= 1
+    return hex(addr)
+  return addr_s
+
+
+def fix_filename(file_name):
+  for path_to_cut in sys.argv[1:]:
+    file_name = re.sub(".*" + path_to_cut, "", file_name)
+  file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
+  file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
+  return file_name
+
+
+# TODO(glider): need some refactoring here
+def symbolize_addr2line(line):
+  #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
+  match = re.match('^( *#([0-9]+) *0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line)
+  if match:
+    frameno = match.group(2)
+    binary = match.group(3)
+    addr = match.group(4)
+    addr = patch_address(frameno, addr)
+    if not pipes.has_key(binary):
+      pipes[binary] = subprocess.Popen(["addr2line", "-f", "-e", binary],
+                         stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    p = pipes[binary]
+    try:
+      print >>p.stdin, addr
+      function_name = p.stdout.readline().rstrip()
+      file_name     = p.stdout.readline().rstrip()
+    except:
+      function_name = ""
+      file_name = ""
+    file_name = fix_filename(file_name)
+
+    print match.group(1), "in", function_name, file_name
+  else:
+    print line.rstrip()
+
+
+def get_macho_filetype(binary):
+  if not filetypes.has_key(binary):
+    otool_pipe = subprocess.Popen(["otool", "-Vh",  binary],
+      stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    otool_line = "".join(otool_pipe.stdout.readlines())
+    for t in ["DYLIB", "EXECUTE"]:
+      if t in otool_line:
+        filetypes[binary] = t
+    otool_pipe.stdin.close()
+  return filetypes[binary]
+
+
+def symbolize_atos(line):
+  #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
+  match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line)
+  if match:
+    #print line
+    prefix = match.group(1)
+    frameno = match.group(2)
+    orig_addr = match.group(3)
+    binary = match.group(4)
+    offset = match.group(5)
+    addr = patch_address(frameno, orig_addr)
+    load_addr = hex(int(orig_addr, 16) - int(offset, 16))
+    filetype = get_macho_filetype(binary)
+
+    if not pipes.has_key(binary):
+      # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
+      if len(addr) > 10:
+        arch = "x86_64"
+      else:
+        arch = "i386"
+
+    if filetype == "DYLIB":
+      load_addr = "0x0"
+    if DEBUG:
+      print "atos -o %s -arch %s -l %s" % (binary, arch, load_addr)
+    pipes[binary] = subprocess.Popen(["atos", "-o", binary, "-arch", arch, "-l", load_addr],
+                         stdin=subprocess.PIPE, stdout=subprocess.PIPE,)
+    p = pipes[binary]
+    if filetype == "DYLIB":
+      print >>p.stdin, "%s" % offset
+    else:
+      print >>p.stdin, "%s" % addr
+    # TODO(glider): it's more efficient to make a batch atos run for each binary.
+    p.stdin.close()
+    atos_line = p.stdout.readline().rstrip()
+    # A well-formed atos response looks like this:
+    #   foo(type1, type2) (in object.name) (filename.cc:80)
+    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
+    #print "atos_line: ", atos_line
+    if match:
+      function_name = match.group(1)
+      function_name = re.sub("\(.*?\)", "", function_name)
+      file_name = fix_filename(match.group(3))
+      print "%s%s in %s %s" % (prefix, addr, function_name, file_name)
+    else:
+      print "%s%s in %s" % (prefix, addr, atos_line)
+    del pipes[binary]
+  else:
+    print line.rstrip()
+
+system = os.uname()[0]
+if system in ['Linux', 'Darwin']:
+  for line in sys.stdin:
+    if system == 'Linux':
+      symbolize_addr2line(line)
+    elif system == 'Darwin':
+      symbolize_atos(line)
+else:
+  print 'Unknown system: ', system
diff --git a/lib/asan/tests/asan_benchmarks_test.cc b/lib/asan/tests/asan_benchmarks_test.cc
new file mode 100644
index 0000000..b72cc3f
--- /dev/null
+++ b/lib/asan/tests/asan_benchmarks_test.cc
@@ -0,0 +1,86 @@
+//===-- asan_benchmarks_test.cc ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Some benchmarks for the instrumented code.
+//===----------------------------------------------------------------------===//
+
+#include "asan_test_config.h"
+#include "asan_test_utils.h"
+
+template<class T>
+__attribute__((noinline))
+static void ManyAccessFunc(T *x, size_t n_elements, size_t n_iter) {
+  for (size_t iter = 0; iter < n_iter; iter++) {
+    break_optimization(0);
+    // hand unroll the loop to stress the reg alloc.
+    for (size_t i = 0; i <= n_elements - 16; i += 16) {
+      x[i + 0] = i;
+      x[i + 1] = i;
+      x[i + 2] = i;
+      x[i + 3] = i;
+      x[i + 4] = i;
+      x[i + 5] = i;
+      x[i + 6] = i;
+      x[i + 7] = i;
+      x[i + 8] = i;
+      x[i + 9] = i;
+      x[i + 10] = i;
+      x[i + 11] = i;
+      x[i + 12] = i;
+      x[i + 13] = i;
+      x[i + 14] = i;
+      x[i + 15] = i;
+    }
+  }
+}
+
+TEST(AddressSanitizer, ManyAccessBenchmark) {
+  size_t kLen = 1024;
+  int *int_array = new int[kLen];
+  ManyAccessFunc(int_array, kLen, 1 << 24);
+  delete [] int_array;
+}
+
+// access 7 char elements in a 7 byte array (i.e. on the border).
+__attribute__((noinline))
+static void BorderAccessFunc(char *x, size_t n_iter) {
+  for (size_t iter = 0; iter < n_iter; iter++) {
+    break_optimization(x);
+    x[0] = 0;
+    x[1] = 0;
+    x[2] = 0;
+    x[3] = 0;
+    x[4] = 0;
+    x[5] = 0;
+    x[6] = 0;
+  }
+}
+
+TEST(AddressSanitizer, BorderAccessBenchmark) {
+  char *char_7_array = new char[7];
+  BorderAccessFunc(char_7_array, 1 << 30);
+  delete [] char_7_array;
+}
+
+static void FunctionWithLargeStack() {
+  int stack[1000];
+  Ident(stack);
+}
+
+TEST(AddressSanitizer, FakeStackBenchmark) {
+  for (int i = 0; i < 10000000; i++)
+    Ident(&FunctionWithLargeStack)();
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/lib/asan/tests/asan_break_optimization.cc b/lib/asan/tests/asan_break_optimization.cc
new file mode 100644
index 0000000..acd0427
--- /dev/null
+++ b/lib/asan/tests/asan_break_optimization.cc
@@ -0,0 +1,18 @@
+//===-- asan_break_optimization.cc ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+
+#include "asan_test_utils.h"
+// Have this function in a separate file to avoid inlining.
+// (Yes, we know about cross-file inlining, but let's assume we don't use it).
+extern "C" void break_optimization(void *x) {
+}
diff --git a/lib/asan/tests/asan_exceptions_test.cc b/lib/asan/tests/asan_exceptions_test.cc
new file mode 100644
index 0000000..ecd406d
--- /dev/null
+++ b/lib/asan/tests/asan_exceptions_test.cc
@@ -0,0 +1,27 @@
+// See http://llvm.org/bugs/show_bug.cgi?id=11468
+#include <stdio.h>
+#include <string>
+
+class Action {
+ public:
+  Action() {}
+  void PrintString(const std::string& msg) const {
+    fprintf(stderr, "%s\n", msg.c_str());
+  }
+  void Throw(const char& arg) const {
+    PrintString("PrintString called!");  // this line is important
+    throw arg;
+  }
+};
+
+int main() {
+  const Action a;
+  fprintf(stderr, "&a before = %p\n", &a);
+  try {
+    a.Throw('c');
+  } catch(const char&) {
+    fprintf(stderr, "&a in catch = %p\n", &a);
+  }
+  fprintf(stderr, "&a final = %p\n", &a);
+  return 0;
+}
diff --git a/lib/asan/tests/asan_globals_test.cc b/lib/asan/tests/asan_globals_test.cc
new file mode 100644
index 0000000..2303f8b
--- /dev/null
+++ b/lib/asan/tests/asan_globals_test.cc
@@ -0,0 +1,24 @@
+//===-- asan_globals_test.cc ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Some globals in a separate file.
+//===----------------------------------------------------------------------===//
+
+extern char glob5[5];
+static char static10[10];
+
+int GlobalsTest(int zero) {
+  static char func_static15[15];
+  glob5[zero] = 0;
+  static10[zero] = 0;
+  func_static15[zero] = 0;
+  return glob5[1] + func_static15[2];
+}
diff --git a/lib/asan/tests/asan_interface_test.cc b/lib/asan/tests/asan_interface_test.cc
new file mode 100644
index 0000000..40e6831
--- /dev/null
+++ b/lib/asan/tests/asan_interface_test.cc
@@ -0,0 +1,388 @@
+//===-- asan_interface_test.cc ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+#include "asan_test_config.h"
+#include "asan_test_utils.h"
+#include "asan_interface.h"
+
+TEST(AddressSanitizerInterface, GetEstimatedAllocatedSize) {
+  EXPECT_EQ(1, __asan_get_estimated_allocated_size(0));
+  const size_t sizes[] = { 1, 30, 1<<30 };
+  for (size_t i = 0; i < 3; i++) {
+    EXPECT_EQ(sizes[i], __asan_get_estimated_allocated_size(sizes[i]));
+  }
+}
+
+static const char* kGetAllocatedSizeErrorMsg =
+  "attempting to call __asan_get_allocated_size()";
+
+TEST(AddressSanitizerInterface, GetAllocatedSizeAndOwnershipTest) {
+  const size_t kArraySize = 100;
+  char *array = Ident((char*)malloc(kArraySize));
+  int *int_ptr = Ident(new int);
+
+  // Allocated memory is owned by allocator. Allocated size should be
+  // equal to requested size.
+  EXPECT_EQ(true, __asan_get_ownership(array));
+  EXPECT_EQ(kArraySize, __asan_get_allocated_size(array));
+  EXPECT_EQ(true, __asan_get_ownership(int_ptr));
+  EXPECT_EQ(sizeof(int), __asan_get_allocated_size(int_ptr));
+
+  // We cannot call GetAllocatedSize from the memory we didn't map,
+  // and from the interior pointers (not returned by previous malloc).
+  void *wild_addr = (void*)0x1;
+  EXPECT_EQ(false, __asan_get_ownership(wild_addr));
+  EXPECT_DEATH(__asan_get_allocated_size(wild_addr), kGetAllocatedSizeErrorMsg);
+  EXPECT_EQ(false, __asan_get_ownership(array + kArraySize / 2));
+  EXPECT_DEATH(__asan_get_allocated_size(array + kArraySize / 2),
+               kGetAllocatedSizeErrorMsg);
+
+  // NULL is not owned, but is a valid argument for __asan_get_allocated_size().
+  EXPECT_EQ(false, __asan_get_ownership(NULL));
+  EXPECT_EQ(0, __asan_get_allocated_size(NULL));
+
+  // When memory is freed, it's not owned, and call to GetAllocatedSize
+  // is forbidden.
+  free(array);
+  EXPECT_EQ(false, __asan_get_ownership(array));
+  EXPECT_DEATH(__asan_get_allocated_size(array), kGetAllocatedSizeErrorMsg);
+
+  delete int_ptr;
+}
+
+TEST(AddressSanitizerInterface, GetCurrentAllocatedBytesTest) {
+  size_t before_malloc, after_malloc, after_free;
+  char *array;
+  const size_t kMallocSize = 100;
+  before_malloc = __asan_get_current_allocated_bytes();
+
+  array = Ident((char*)malloc(kMallocSize));
+  after_malloc = __asan_get_current_allocated_bytes();
+  EXPECT_EQ(before_malloc + kMallocSize, after_malloc);
+
+  free(array);
+  after_free = __asan_get_current_allocated_bytes();
+  EXPECT_EQ(before_malloc, after_free);
+}
+
+static void DoDoubleFree() {
+  int *x = Ident(new int);
+  delete Ident(x);
+  delete Ident(x);
+}
+
+// This test is run in a separate process, so that large malloced
+// chunk won't remain in the free lists after the test.
+// Note: use ASSERT_* instead of EXPECT_* here.
+static void RunGetHeapSizeTestAndDie() {
+  size_t old_heap_size, new_heap_size, heap_growth;
+  // We unlikely have have chunk of this size in free list.
+  static const size_t kLargeMallocSize = 1 << 29;  // 512M
+  old_heap_size = __asan_get_heap_size();
+  fprintf(stderr, "allocating %zu bytes:\n", kLargeMallocSize);
+  free(Ident(malloc(kLargeMallocSize)));
+  new_heap_size = __asan_get_heap_size();
+  heap_growth = new_heap_size - old_heap_size;
+  fprintf(stderr, "heap growth after first malloc: %zu\n", heap_growth);
+  ASSERT_GE(heap_growth, kLargeMallocSize);
+  ASSERT_LE(heap_growth, 2 * kLargeMallocSize);
+
+  // Now large chunk should fall into free list, and can be
+  // allocated without increasing heap size.
+  old_heap_size = new_heap_size;
+  free(Ident(malloc(kLargeMallocSize)));
+  heap_growth = __asan_get_heap_size() - old_heap_size;
+  fprintf(stderr, "heap growth after second malloc: %zu\n", heap_growth);
+  ASSERT_LT(heap_growth, kLargeMallocSize);
+
+  // Test passed. Now die with expected double-free.
+  DoDoubleFree();
+}
+
+TEST(AddressSanitizerInterface, GetHeapSizeTest) {
+  EXPECT_DEATH(RunGetHeapSizeTestAndDie(), "double-free");
+}
+
+// Note: use ASSERT_* instead of EXPECT_* here.
+static void DoLargeMallocForGetFreeBytesTestAndDie() {
+  size_t old_free_bytes, new_free_bytes;
+  static const size_t kLargeMallocSize = 1 << 29;  // 512M
+  // If we malloc and free a large memory chunk, it will not fall
+  // into quarantine and will be available for future requests.
+  old_free_bytes = __asan_get_free_bytes();
+  fprintf(stderr, "allocating %zu bytes:\n", kLargeMallocSize);
+  fprintf(stderr, "free bytes before malloc: %zu\n", old_free_bytes);
+  free(Ident(malloc(kLargeMallocSize)));
+  new_free_bytes = __asan_get_free_bytes();
+  fprintf(stderr, "free bytes after malloc and free: %zu\n", new_free_bytes);
+  ASSERT_GE(new_free_bytes, old_free_bytes + kLargeMallocSize);
+  // Test passed.
+  DoDoubleFree();
+}
+
+TEST(AddressSanitizerInterface, GetFreeBytesTest) {
+  static const size_t kNumOfChunks = 100;
+  static const size_t kChunkSize = 100;
+  char *chunks[kNumOfChunks];
+  size_t i;
+  size_t old_free_bytes, new_free_bytes;
+  // Allocate a small chunk. Now allocator probably has a lot of these
+  // chunks to fulfill future requests. So, future requests will decrease
+  // the number of free bytes.
+  chunks[0] = Ident((char*)malloc(kChunkSize));
+  old_free_bytes = __asan_get_free_bytes();
+  for (i = 1; i < kNumOfChunks; i++) {
+    chunks[i] = Ident((char*)malloc(kChunkSize));
+    new_free_bytes = __asan_get_free_bytes();
+    EXPECT_LT(new_free_bytes, old_free_bytes);
+    old_free_bytes = new_free_bytes;
+  }
+  // Deleting these chunks will move them to quarantine, number of free
+  // bytes won't increase.
+  for (i = 0; i < kNumOfChunks; i++) {
+    free(chunks[i]);
+    EXPECT_EQ(old_free_bytes, __asan_get_free_bytes());
+  }
+  EXPECT_DEATH(DoLargeMallocForGetFreeBytesTestAndDie(), "double-free");
+}
+
+static const size_t kManyThreadsMallocSizes[] = {5, 1UL<<10, 1UL<<20, 357};
+static const size_t kManyThreadsIterations = 250;
+static const size_t kManyThreadsNumThreads = 200;
+
+void *ManyThreadsWithStatsWorker(void *arg) {
+  for (size_t iter = 0; iter < kManyThreadsIterations; iter++) {
+    for (size_t size_index = 0; size_index < 4; size_index++) {
+      free(Ident(malloc(kManyThreadsMallocSizes[size_index])));
+    }
+  }
+  return 0;
+}
+
+TEST(AddressSanitizerInterface, ManyThreadsWithStatsStressTest) {
+  size_t before_test, after_test, i;
+  pthread_t threads[kManyThreadsNumThreads];
+  before_test = __asan_get_current_allocated_bytes();
+  for (i = 0; i < kManyThreadsNumThreads; i++) {
+    pthread_create(&threads[i], 0,
+                   (void* (*)(void *x))ManyThreadsWithStatsWorker, (void*)i);
+  }
+  for (i = 0; i < kManyThreadsNumThreads; i++) {
+    pthread_join(threads[i], 0);
+  }
+  after_test = __asan_get_current_allocated_bytes();
+  // ASan stats also reflect memory usage of internal ASan RTL structs,
+  // so we can't check for equality here.
+  EXPECT_LT(after_test, before_test + (1UL<<20));
+}
+
+TEST(AddressSanitizerInterface, ExitCode) {
+  int original_exit_code = __asan_set_error_exit_code(7);
+  EXPECT_EXIT(DoDoubleFree(), ::testing::ExitedWithCode(7), "");
+  EXPECT_EQ(7, __asan_set_error_exit_code(8));
+  EXPECT_EXIT(DoDoubleFree(), ::testing::ExitedWithCode(8), "");
+  EXPECT_EQ(8, __asan_set_error_exit_code(original_exit_code));
+  EXPECT_EXIT(DoDoubleFree(),
+              ::testing::ExitedWithCode(original_exit_code), "");
+}
+
+static void MyDeathCallback() {
+  fprintf(stderr, "MyDeathCallback\n");
+}
+
+TEST(AddressSanitizerInterface, DeathCallbackTest) {
+  __asan_set_death_callback(MyDeathCallback);
+  EXPECT_DEATH(DoDoubleFree(), "MyDeathCallback");
+  __asan_set_death_callback(NULL);
+}
+
+static const char* kUseAfterPoisonErrorMessage = "use-after-poison";
+
+#define ACCESS(ptr, offset) Ident(*(ptr + offset))
+
+#define DIE_ON_ACCESS(ptr, offset) \
+    EXPECT_DEATH(Ident(*(ptr + offset)), kUseAfterPoisonErrorMessage)
+
+TEST(AddressSanitizerInterface, SimplePoisonMemoryRegionTest) {
+  char *array = Ident((char*)malloc(120));
+  // poison array[40..80)
+  ASAN_POISON_MEMORY_REGION(array + 40, 40);
+  ACCESS(array, 39);
+  ACCESS(array, 80);
+  DIE_ON_ACCESS(array, 40);
+  DIE_ON_ACCESS(array, 60);
+  DIE_ON_ACCESS(array, 79);
+  ASAN_UNPOISON_MEMORY_REGION(array + 40, 40);
+  // access previously poisoned memory.
+  ACCESS(array, 40);
+  ACCESS(array, 79);
+  free(array);
+}
+
+TEST(AddressSanitizerInterface, OverlappingPoisonMemoryRegionTest) {
+  char *array = Ident((char*)malloc(120));
+  // Poison [0..40) and [80..120)
+  ASAN_POISON_MEMORY_REGION(array, 40);
+  ASAN_POISON_MEMORY_REGION(array + 80, 40);
+  DIE_ON_ACCESS(array, 20);
+  ACCESS(array, 60);
+  DIE_ON_ACCESS(array, 100);
+  // Poison whole array - [0..120)
+  ASAN_POISON_MEMORY_REGION(array, 120);
+  DIE_ON_ACCESS(array, 60);
+  // Unpoison [24..96)
+  ASAN_UNPOISON_MEMORY_REGION(array + 24, 72);
+  DIE_ON_ACCESS(array, 23);
+  ACCESS(array, 24);
+  ACCESS(array, 60);
+  ACCESS(array, 95);
+  DIE_ON_ACCESS(array, 96);
+  free(array);
+}
+
+TEST(AddressSanitizerInterface, PushAndPopWithPoisoningTest) {
+  // Vector of capacity 20
+  char *vec = Ident((char*)malloc(20));
+  ASAN_POISON_MEMORY_REGION(vec, 20);
+  for (size_t i = 0; i < 7; i++) {
+    // Simulate push_back.
+    ASAN_UNPOISON_MEMORY_REGION(vec + i, 1);
+    ACCESS(vec, i);
+    DIE_ON_ACCESS(vec, i + 1);
+  }
+  for (size_t i = 7; i > 0; i--) {
+    // Simulate pop_back.
+    ASAN_POISON_MEMORY_REGION(vec + i - 1, 1);
+    DIE_ON_ACCESS(vec, i - 1);
+    if (i > 1) ACCESS(vec, i - 2);
+  }
+  free(vec);
+}
+
+// Make sure that each aligned block of size "2^granularity" doesn't have
+// "true" value before "false" value.
+static void MakeShadowValid(bool *shadow, int length, int granularity) {
+  bool can_be_poisoned = true;
+  for (int i = length - 1; i >= 0; i--) {
+    can_be_poisoned &= shadow[i];
+    shadow[i] &= can_be_poisoned;
+    if (i % (1 << granularity) == 0) {
+      can_be_poisoned = true;
+    }
+  }
+}
+
+TEST(AddressSanitizerInterface, PoisoningStressTest) {
+  const size_t kSize = 24;
+  bool expected[kSize];
+  char *arr = Ident((char*)malloc(kSize));
+  for (size_t l1 = 0; l1 < kSize; l1++) {
+    for (size_t s1 = 1; l1 + s1 <= kSize; s1++) {
+      for (size_t l2 = 0; l2 < kSize; l2++) {
+        for (size_t s2 = 1; l2 + s2 <= kSize; s2++) {
+          // Poison [l1, l1+s1), [l2, l2+s2) and check result.
+          ASAN_UNPOISON_MEMORY_REGION(arr, kSize);
+          ASAN_POISON_MEMORY_REGION(arr + l1, s1);
+          ASAN_POISON_MEMORY_REGION(arr + l2, s2);
+          memset(expected, false, kSize);
+          memset(expected + l1, true, s1);
+          MakeShadowValid(expected, 24, /*granularity*/ 3);
+          memset(expected + l2, true, s2);
+          MakeShadowValid(expected, 24, /*granularity*/ 3);
+          for (size_t i = 0; i < kSize; i++) {
+            ASSERT_EQ(expected[i], __asan_address_is_poisoned(arr + i));
+          }
+          // Unpoison [l1, l1+s1) and [l2, l2+s2) and check result.
+          ASAN_POISON_MEMORY_REGION(arr, kSize);
+          ASAN_UNPOISON_MEMORY_REGION(arr + l1, s1);
+          ASAN_UNPOISON_MEMORY_REGION(arr + l2, s2);
+          memset(expected, true, kSize);
+          memset(expected + l1, false, s1);
+          MakeShadowValid(expected, 24, /*granularity*/ 3);
+          memset(expected + l2, false, s2);
+          MakeShadowValid(expected, 24, /*granularity*/ 3);
+          for (size_t i = 0; i < kSize; i++) {
+            ASSERT_EQ(expected[i], __asan_address_is_poisoned(arr + i));
+          }
+        }
+      }
+    }
+  }
+}
+
+static const char *kInvalidPoisonMessage = "invalid-poison-memory-range";
+static const char *kInvalidUnpoisonMessage = "invalid-unpoison-memory-range";
+
+TEST(AddressSanitizerInterface, DISABLED_InvalidPoisonAndUnpoisonCallsTest) {
+  char *array = Ident((char*)malloc(120));
+  ASAN_UNPOISON_MEMORY_REGION(array, 120);
+  // Try to unpoison not owned memory
+  EXPECT_DEATH(ASAN_UNPOISON_MEMORY_REGION(array, 121),
+               kInvalidUnpoisonMessage);
+  EXPECT_DEATH(ASAN_UNPOISON_MEMORY_REGION(array - 1, 120),
+               kInvalidUnpoisonMessage);
+
+  ASAN_POISON_MEMORY_REGION(array, 120);
+  // Try to poison not owned memory.
+  EXPECT_DEATH(ASAN_POISON_MEMORY_REGION(array, 121), kInvalidPoisonMessage);
+  EXPECT_DEATH(ASAN_POISON_MEMORY_REGION(array - 1, 120),
+               kInvalidPoisonMessage);
+  free(array);
+}
+
+static void ErrorReportCallbackOneToZ(const char *report) {
+  int len = strlen(report);
+  char *dup = (char*)malloc(len);
+  strcpy(dup, report);
+  for (int i = 0; i < len; i++) {
+    if (dup[i] == '1') dup[i] = 'Z';
+  }
+  write(2, dup, len);
+  free(dup);
+}
+
+TEST(AddressSanitizerInterface, SetErrorReportCallbackTest) {
+  __asan_set_error_report_callback(ErrorReportCallbackOneToZ);
+  char *array = Ident((char*)malloc(120));
+  EXPECT_DEATH(ACCESS(array, 120), "size Z");
+  __asan_set_error_report_callback(NULL);
+}
+
+#ifdef __linux__
+// http://code.google.com/p/address-sanitizer/issues/detail?id=51
+TEST(AddressSanitizerInterface, GetOwnershipStressTest) {
+  std::vector<char *> pointers;
+  std::vector<size_t> sizes;
+  const size_t kNumMallocs =
+      (__WORDSIZE <= 32 || ASAN_LOW_MEMORY) ? 1 << 10 : 1 << 14;
+  for (size_t i = 0; i < kNumMallocs; i++) {
+    size_t size = i * 100 + 1;
+    pointers.push_back((char*)malloc(size));
+    sizes.push_back(size);
+  }
+  for (size_t i = 0; i < 4000000; i++) {
+    EXPECT_FALSE(__asan_get_ownership(&pointers));
+    EXPECT_FALSE(__asan_get_ownership((void*)0x1234));
+    size_t idx = i % kNumMallocs;
+    EXPECT_TRUE(__asan_get_ownership(pointers[idx]));
+    EXPECT_EQ(sizes[idx], __asan_get_allocated_size(pointers[idx]));
+  }
+  for (size_t i = 0, n = pointers.size(); i < n; i++)
+    free(pointers[i]);
+}
+#endif  // __linux__
diff --git a/lib/asan/tests/asan_mac_test.h b/lib/asan/tests/asan_mac_test.h
new file mode 100644
index 0000000..4bb5636
--- /dev/null
+++ b/lib/asan/tests/asan_mac_test.h
@@ -0,0 +1,17 @@
+extern "C" {
+  void CFAllocatorDefaultDoubleFree();
+  void CFAllocatorSystemDefaultDoubleFree();
+  void CFAllocatorMallocDoubleFree();
+  void CFAllocatorMallocZoneDoubleFree();
+  void CallFreeOnWorkqueue(void *mem);
+  void TestGCDDispatchAsync();
+  void TestGCDDispatchSync();
+  void TestGCDReuseWqthreadsAsync();
+  void TestGCDReuseWqthreadsSync();
+  void TestGCDDispatchAfter();
+  void TestGCDInTSDDestructor();
+  void TestGCDSourceEvent();
+  void TestGCDSourceCancel();
+  void TestGCDGroupAsync();
+  void TestOOBNSObjects();
+}
diff --git a/lib/asan/tests/asan_mac_test.mm b/lib/asan/tests/asan_mac_test.mm
new file mode 100644
index 0000000..bbce4c5
--- /dev/null
+++ b/lib/asan/tests/asan_mac_test.mm
@@ -0,0 +1,225 @@
+// Mac OS X 10.6 or higher only.
+#include <dispatch/dispatch.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#import <CoreFoundation/CFBase.h>
+#import <Foundation/NSObject.h>
+
+void CFAllocatorDefaultDoubleFree() {
+  void *mem =  CFAllocatorAllocate(kCFAllocatorDefault, 5, 0);
+  CFAllocatorDeallocate(kCFAllocatorDefault, mem);
+  CFAllocatorDeallocate(kCFAllocatorDefault, mem);
+}
+
+void CFAllocatorSystemDefaultDoubleFree() {
+  void *mem =  CFAllocatorAllocate(kCFAllocatorSystemDefault, 5, 0);
+  CFAllocatorDeallocate(kCFAllocatorSystemDefault, mem);
+  CFAllocatorDeallocate(kCFAllocatorSystemDefault, mem);
+}
+
+void CFAllocatorMallocDoubleFree() {
+  void *mem =  CFAllocatorAllocate(kCFAllocatorMalloc, 5, 0);
+  CFAllocatorDeallocate(kCFAllocatorMalloc, mem);
+  CFAllocatorDeallocate(kCFAllocatorMalloc, mem);
+}
+
+void CFAllocatorMallocZoneDoubleFree() {
+  void *mem =  CFAllocatorAllocate(kCFAllocatorMallocZone, 5, 0);
+  CFAllocatorDeallocate(kCFAllocatorMallocZone, mem);
+  CFAllocatorDeallocate(kCFAllocatorMallocZone, mem);
+}
+
+__attribute__((noinline))
+void access_memory(char *a) {
+  *a = 0;
+}
+
+// Test the +load instrumentation.
+// Because the +load methods are invoked before anything else is initialized,
+// it makes little sense to wrap the code below into a gTest test case.
+// If AddressSanitizer doesn't instrument the +load method below correctly,
+// everything will just crash.
+
+char kStartupStr[] =
+    "If your test didn't crash, AddressSanitizer is instrumenting "
+    "the +load methods correctly.";
+
+@interface LoadSomething : NSObject {
+}
+@end
+
+@implementation LoadSomething
+
++(void) load {
+  for (int i = 0; i < strlen(kStartupStr); i++) {
+    access_memory(&kStartupStr[i]);  // make sure no optimizations occur.
+  }
+  // Don't print anything here not to interfere with the death tests.
+}
+
+@end
+
+void worker_do_alloc(int size) {
+  char * volatile mem = malloc(size);
+  mem[0] = 0; // Ok
+  free(mem);
+}
+
+void worker_do_crash(int size) {
+  char * volatile mem = malloc(size);
+  access_memory(&mem[size]);  // BOOM
+  free(mem);
+}
+
+// Tests for the Grand Central Dispatch. See
+// http://developer.apple.com/library/mac/#documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
+// for the reference.
+
+void TestGCDDispatchAsync() {
+  dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
+  dispatch_block_t block = ^{ worker_do_crash(1024); };
+  // dispatch_async() runs the task on a worker thread that does not go through
+  // pthread_create(). We need to verify that AddressSanitizer notices that the
+  // thread has started.
+  dispatch_async(queue, block);
+  // TODO(glider): this is hacky. Need to wait for the worker instead.
+  sleep(1);
+}
+
+void TestGCDDispatchSync() {
+  dispatch_queue_t queue = dispatch_get_global_queue(2, 0);
+  dispatch_block_t block = ^{ worker_do_crash(1024); };
+  // dispatch_sync() runs the task on a worker thread that does not go through
+  // pthread_create(). We need to verify that AddressSanitizer notices that the
+  // thread has started.
+  dispatch_sync(queue, block);
+  // TODO(glider): this is hacky. Need to wait for the worker instead.
+  sleep(1);
+}
+
+// libdispatch spawns a rather small number of threads and reuses them. We need
+// to make sure AddressSanitizer handles the reusing correctly.
+void TestGCDReuseWqthreadsAsync() {
+  dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
+  dispatch_block_t block_alloc = ^{ worker_do_alloc(1024); };
+  dispatch_block_t block_crash = ^{ worker_do_crash(1024); };
+  for (int i = 0; i < 100; i++) {
+    dispatch_async(queue, block_alloc);
+  }
+  dispatch_async(queue, block_crash);
+  // TODO(glider): this is hacky. Need to wait for the workers instead.
+  sleep(1);
+}
+
+// Try to trigger abnormal behaviour of dispatch_sync() being unhandled by us.
+void TestGCDReuseWqthreadsSync() {
+  dispatch_queue_t queue[4];
+  queue[0] = dispatch_get_global_queue(2, 0);
+  queue[1] = dispatch_get_global_queue(0, 0);
+  queue[2] = dispatch_get_global_queue(-2, 0);
+  queue[3] = dispatch_queue_create("my_queue", NULL);
+  dispatch_block_t block_alloc = ^{ worker_do_alloc(1024); };
+  dispatch_block_t block_crash = ^{ worker_do_crash(1024); };
+  for (int i = 0; i < 1000; i++) {
+    dispatch_sync(queue[i % 4], block_alloc);
+  }
+  dispatch_sync(queue[3], block_crash);
+  // TODO(glider): this is hacky. Need to wait for the workers instead.
+  sleep(1);
+}
+
+void TestGCDDispatchAfter() {
+  dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
+  dispatch_block_t block_crash = ^{ worker_do_crash(1024); };
+  // Schedule the event one second from the current time.
+  dispatch_time_t milestone =
+      dispatch_time(DISPATCH_TIME_NOW, 1LL * NSEC_PER_SEC);
+  dispatch_after(milestone, queue, block_crash);
+  // Let's wait for a bit longer now.
+  // TODO(glider): this is still hacky.
+  sleep(2);
+}
+
+void worker_do_deallocate(void *ptr) {
+  free(ptr);
+}
+
+void CallFreeOnWorkqueue(void *tsd) {
+  dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
+  dispatch_block_t block_dealloc = ^{ worker_do_deallocate(tsd); };
+  dispatch_async(queue, block_dealloc);
+  // Do not wait for the worker to free the memory -- nobody is going to touch
+  // it.
+}
+
+void TestGCDSourceEvent() {
+  dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
+  dispatch_source_t timer =
+      dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, queue);
+  // Schedule the timer one second from the current time.
+  dispatch_time_t milestone =
+      dispatch_time(DISPATCH_TIME_NOW, 1LL * NSEC_PER_SEC);
+
+  dispatch_source_set_timer(timer, milestone, DISPATCH_TIME_FOREVER, 0);
+  char * volatile mem = malloc(10);
+  dispatch_source_set_event_handler(timer, ^{
+    access_memory(&mem[10]);
+  });
+  dispatch_resume(timer);
+  sleep(2);
+}
+
+void TestGCDSourceCancel() {
+  dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
+  dispatch_source_t timer =
+      dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, queue);
+  // Schedule the timer one second from the current time.
+  dispatch_time_t milestone =
+      dispatch_time(DISPATCH_TIME_NOW, 1LL * NSEC_PER_SEC);
+
+  dispatch_source_set_timer(timer, milestone, DISPATCH_TIME_FOREVER, 0);
+  char * volatile mem = malloc(10);
+  // Both dispatch_source_set_cancel_handler() and
+  // dispatch_source_set_event_handler() use dispatch_barrier_async_f().
+  // It's tricky to test dispatch_source_set_cancel_handler() separately,
+  // so we test both here.
+  dispatch_source_set_event_handler(timer, ^{
+    dispatch_source_cancel(timer);
+  });
+  dispatch_source_set_cancel_handler(timer, ^{
+    access_memory(&mem[10]);
+  });
+  dispatch_resume(timer);
+  sleep(2);
+}
+
+void TestGCDGroupAsync() {
+  dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
+  dispatch_group_t group = dispatch_group_create(); 
+  char * volatile mem = malloc(10);
+  dispatch_group_async(group, queue, ^{
+    access_memory(&mem[10]);
+  });
+  dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
+}
+
+@interface FixedArray : NSObject {
+  int items[10];
+}
+@end
+
+@implementation FixedArray
+-(int) access: (int)index {
+  return items[index];
+}
+@end
+
+void TestOOBNSObjects() {
+  id anObject = [FixedArray new];
+  [anObject access:1];
+  [anObject access:11];
+  [anObject release];
+}
diff --git a/lib/asan/tests/asan_noinst_test.cc b/lib/asan/tests/asan_noinst_test.cc
new file mode 100644
index 0000000..204c0da
--- /dev/null
+++ b/lib/asan/tests/asan_noinst_test.cc
@@ -0,0 +1,329 @@
+//===-- asan_noinst_test.cc ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// This test file should be compiled w/o asan instrumentation.
+//===----------------------------------------------------------------------===//
+#include "asan_allocator.h"
+#include "asan_interface.h"
+#include "asan_internal.h"
+#include "asan_mapping.h"
+#include "asan_stack.h"
+#include "asan_test_utils.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vector>
+#include <algorithm>
+#include "gtest/gtest.h"
+
+// Simple stand-alone pseudorandom number generator.
+// Current algorithm is ANSI C linear congruential PRNG.
+static inline uint32_t my_rand(uint32_t* state) {
+  return (*state = *state * 1103515245 + 12345) >> 16;
+}
+
+static uint32_t global_seed = 0;
+
+
+TEST(AddressSanitizer, InternalSimpleDeathTest) {
+  EXPECT_DEATH(exit(1), "");
+}
+
+static void MallocStress(size_t n) {
+  uint32_t seed = my_rand(&global_seed);
+  __asan::AsanStackTrace stack1;
+  stack1.trace[0] = 0xa123;
+  stack1.trace[1] = 0xa456;
+  stack1.size = 2;
+
+  __asan::AsanStackTrace stack2;
+  stack2.trace[0] = 0xb123;
+  stack2.trace[1] = 0xb456;
+  stack2.size = 2;
+
+  __asan::AsanStackTrace stack3;
+  stack3.trace[0] = 0xc123;
+  stack3.trace[1] = 0xc456;
+  stack3.size = 2;
+
+  std::vector<void *> vec;
+  for (size_t i = 0; i < n; i++) {
+    if ((i % 3) == 0) {
+      if (vec.empty()) continue;
+      size_t idx = my_rand(&seed) % vec.size();
+      void *ptr = vec[idx];
+      vec[idx] = vec.back();
+      vec.pop_back();
+      __asan::asan_free(ptr, &stack1);
+    } else {
+      size_t size = my_rand(&seed) % 1000 + 1;
+      switch ((my_rand(&seed) % 128)) {
+        case 0: size += 1024; break;
+        case 1: size += 2048; break;
+        case 2: size += 4096; break;
+      }
+      size_t alignment = 1 << (my_rand(&seed) % 10 + 1);
+      char *ptr = (char*)__asan::asan_memalign(alignment, size, &stack2);
+      vec.push_back(ptr);
+      ptr[0] = 0;
+      ptr[size-1] = 0;
+      ptr[size/2] = 0;
+    }
+  }
+  for (size_t i = 0; i < vec.size(); i++)
+    __asan::asan_free(vec[i], &stack3);
+}
+
+
+TEST(AddressSanitizer, NoInstMallocTest) {
+#ifdef __arm__
+  MallocStress(300000);
+#else
+  MallocStress(1000000);
+#endif
+}
+
+static void PrintShadow(const char *tag, uintptr_t ptr, size_t size) {
+  fprintf(stderr, "%s shadow: %lx size % 3ld: ", tag, (long)ptr, (long)size);
+  uintptr_t prev_shadow = 0;
+  for (intptr_t i = -32; i < (intptr_t)size + 32; i++) {
+    uintptr_t shadow = __asan::MemToShadow(ptr + i);
+    if (i == 0 || i == (intptr_t)size)
+      fprintf(stderr, ".");
+    if (shadow != prev_shadow) {
+      prev_shadow = shadow;
+      fprintf(stderr, "%02x", (int)*(uint8_t*)shadow);
+    }
+  }
+  fprintf(stderr, "\n");
+}
+
+TEST(AddressSanitizer, DISABLED_InternalPrintShadow) {
+  for (size_t size = 1; size <= 513; size++) {
+    char *ptr = new char[size];
+    PrintShadow("m", (uintptr_t)ptr, size);
+    delete [] ptr;
+    PrintShadow("f", (uintptr_t)ptr, size);
+  }
+}
+
+static uintptr_t pc_array[] = {
+#if __WORDSIZE == 64
+  0x7effbf756068ULL,
+  0x7effbf75e5abULL,
+  0x7effc0625b7cULL,
+  0x7effc05b8997ULL,
+  0x7effbf990577ULL,
+  0x7effbf990c56ULL,
+  0x7effbf992f3cULL,
+  0x7effbf950c22ULL,
+  0x7effc036dba0ULL,
+  0x7effc03638a3ULL,
+  0x7effc035be4aULL,
+  0x7effc0539c45ULL,
+  0x7effc0539a65ULL,
+  0x7effc03db9b3ULL,
+  0x7effc03db100ULL,
+  0x7effc037c7b8ULL,
+  0x7effc037bfffULL,
+  0x7effc038b777ULL,
+  0x7effc038021cULL,
+  0x7effc037c7d1ULL,
+  0x7effc037bfffULL,
+  0x7effc038b777ULL,
+  0x7effc038021cULL,
+  0x7effc037c7d1ULL,
+  0x7effc037bfffULL,
+  0x7effc038b777ULL,
+  0x7effc038021cULL,
+  0x7effc037c7d1ULL,
+  0x7effc037bfffULL,
+  0x7effc0520d26ULL,
+  0x7effc009ddffULL,
+  0x7effbf90bb50ULL,
+  0x7effbdddfa69ULL,
+  0x7effbdde1fe2ULL,
+  0x7effbdde2424ULL,
+  0x7effbdde27b3ULL,
+  0x7effbddee53bULL,
+  0x7effbdde1988ULL,
+  0x7effbdde0904ULL,
+  0x7effc106ce0dULL,
+  0x7effbcc3fa04ULL,
+  0x7effbcc3f6a4ULL,
+  0x7effbcc3e726ULL,
+  0x7effbcc40852ULL,
+  0x7effb681ec4dULL,
+#endif  // __WORDSIZE
+  0xB0B5E768,
+  0x7B682EC1,
+  0x367F9918,
+  0xAE34E13,
+  0xBA0C6C6,
+  0x13250F46,
+  0xA0D6A8AB,
+  0x2B07C1A8,
+  0x6C844F4A,
+  0x2321B53,
+  0x1F3D4F8F,
+  0x3FE2924B,
+  0xB7A2F568,
+  0xBD23950A,
+  0x61020930,
+  0x33E7970C,
+  0x405998A1,
+  0x59F3551D,
+  0x350E3028,
+  0xBC55A28D,
+  0x361F3AED,
+  0xBEAD0F73,
+  0xAEF28479,
+  0x757E971F,
+  0xAEBA450,
+  0x43AD22F5,
+  0x8C2C50C4,
+  0x7AD8A2E1,
+  0x69EE4EE8,
+  0xC08DFF,
+  0x4BA6538,
+  0x3708AB2,
+  0xC24B6475,
+  0x7C8890D7,
+  0x6662495F,
+  0x9B641689,
+  0xD3596B,
+  0xA1049569,
+  0x44CBC16,
+  0x4D39C39F
+};
+
+void CompressStackTraceTest(size_t n_iter) {
+  uint32_t seed = my_rand(&global_seed);
+  const size_t kNumPcs = ASAN_ARRAY_SIZE(pc_array);
+  uint32_t compressed[2 * kNumPcs];
+
+  for (size_t iter = 0; iter < n_iter; iter++) {
+    std::random_shuffle(pc_array, pc_array + kNumPcs);
+    __asan::AsanStackTrace stack0, stack1;
+    stack0.CopyFrom(pc_array, kNumPcs);
+    stack0.size = std::max((size_t)1, (size_t)my_rand(&seed) % stack0.size);
+    size_t compress_size =
+      std::max((size_t)2, (size_t)my_rand(&seed) % (2 * kNumPcs));
+    size_t n_frames =
+      __asan::AsanStackTrace::CompressStack(&stack0, compressed, compress_size);
+    assert(n_frames <= stack0.size);
+    __asan::AsanStackTrace::UncompressStack(&stack1, compressed, compress_size);
+    assert(stack1.size == n_frames);
+    for (size_t i = 0; i < stack1.size; i++) {
+      assert(stack0.trace[i] == stack1.trace[i]);
+    }
+  }
+}
+
+TEST(AddressSanitizer, CompressStackTraceTest) {
+  CompressStackTraceTest(10000);
+}
+
+void CompressStackTraceBenchmark(size_t n_iter) {
+  const size_t kNumPcs = ASAN_ARRAY_SIZE(pc_array);
+  uint32_t compressed[2 * kNumPcs];
+  std::random_shuffle(pc_array, pc_array + kNumPcs);
+
+  __asan::AsanStackTrace stack0;
+  stack0.CopyFrom(pc_array, kNumPcs);
+  stack0.size = kNumPcs;
+  for (size_t iter = 0; iter < n_iter; iter++) {
+    size_t compress_size = kNumPcs;
+    size_t n_frames =
+      __asan::AsanStackTrace::CompressStack(&stack0, compressed, compress_size);
+    Ident(n_frames);
+  }
+}
+
+TEST(AddressSanitizer, CompressStackTraceBenchmark) {
+  CompressStackTraceBenchmark(1 << 24);
+}
+
+TEST(AddressSanitizer, QuarantineTest) {
+  __asan::AsanStackTrace stack;
+  stack.trace[0] = 0x890;
+  stack.size = 1;
+
+  const int size = 32;
+  void *p = __asan::asan_malloc(size, &stack);
+  __asan::asan_free(p, &stack);
+  size_t i;
+  size_t max_i = 1 << 30;
+  for (i = 0; i < max_i; i++) {
+    void *p1 = __asan::asan_malloc(size, &stack);
+    __asan::asan_free(p1, &stack);
+    if (p1 == p) break;
+  }
+  // fprintf(stderr, "i=%ld\n", i);
+  EXPECT_GE(i, 100000U);
+  EXPECT_LT(i, max_i);
+}
+
+void *ThreadedQuarantineTestWorker(void *unused) {
+  uint32_t seed = my_rand(&global_seed);
+  __asan::AsanStackTrace stack;
+  stack.trace[0] = 0x890;
+  stack.size = 1;
+
+  for (size_t i = 0; i < 1000; i++) {
+    void *p = __asan::asan_malloc(1 + (my_rand(&seed) % 4000), &stack);
+    __asan::asan_free(p, &stack);
+  }
+  return NULL;
+}
+
+// Check that the thread local allocators are flushed when threads are
+// destroyed.
+TEST(AddressSanitizer, ThreadedQuarantineTest) {
+  const int n_threads = 3000;
+  size_t mmaped1 = __asan_get_heap_size();
+  for (int i = 0; i < n_threads; i++) {
+    pthread_t t;
+    pthread_create(&t, NULL, ThreadedQuarantineTestWorker, 0);
+    pthread_join(t, 0);
+    size_t mmaped2 = __asan_get_heap_size();
+    EXPECT_LT(mmaped2 - mmaped1, 320U * (1 << 20));
+  }
+}
+
+void *ThreadedOneSizeMallocStress(void *unused) {
+  __asan::AsanStackTrace stack;
+  stack.trace[0] = 0x890;
+  stack.size = 1;
+  const size_t kNumMallocs = 1000;
+  for (int iter = 0; iter < 1000; iter++) {
+    void *p[kNumMallocs];
+    for (size_t i = 0; i < kNumMallocs; i++) {
+      p[i] = __asan::asan_malloc(32, &stack);
+    }
+    for (size_t i = 0; i < kNumMallocs; i++) {
+      __asan::asan_free(p[i], &stack);
+    }
+  }
+  return NULL;
+}
+
+TEST(AddressSanitizer, ThreadedOneSizeMallocStressTest) {
+  const int kNumThreads = 4;
+  pthread_t t[kNumThreads];
+  for (int i = 0; i < kNumThreads; i++) {
+    pthread_create(&t[i], 0, ThreadedOneSizeMallocStress, 0);
+  }
+  for (int i = 0; i < kNumThreads; i++) {
+    pthread_join(t[i], 0);
+  }
+}
diff --git a/lib/asan/tests/asan_racy_double_free_test.cc b/lib/asan/tests/asan_racy_double_free_test.cc
new file mode 100644
index 0000000..deeeb4f
--- /dev/null
+++ b/lib/asan/tests/asan_racy_double_free_test.cc
@@ -0,0 +1,32 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+const int N = 1000;
+void *x[N];
+
+void *Thread1(void *unused) {
+  for (int i = 0; i < N; i++) {
+    fprintf(stderr, "%s %d\n", __FUNCTION__, i);
+    free(x[i]);
+  }
+  return NULL;
+}
+
+void *Thread2(void *unused) {
+  for (int i = 0; i < N; i++) {
+    fprintf(stderr, "%s %d\n", __FUNCTION__, i);
+    free(x[i]);
+  }
+  return NULL;
+}
+
+int main() {
+  for (int i = 0; i < N; i++)
+    x[i] = malloc(128);
+  pthread_t t[2];
+  pthread_create(&t[0], 0, Thread1, 0);
+  pthread_create(&t[1], 0, Thread2, 0);
+  pthread_join(t[0], 0);
+  pthread_join(t[1], 0);
+}
diff --git a/lib/asan/tests/asan_test.cc b/lib/asan/tests/asan_test.cc
new file mode 100644
index 0000000..3f251dd
--- /dev/null
+++ b/lib/asan/tests/asan_test.cc
@@ -0,0 +1,2015 @@
+//===-- asan_test.cc ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <emmintrin.h>
+#endif
+
+#include "asan_test_config.h"
+#include "asan_test_utils.h"
+
+#ifndef __APPLE__
+#include <malloc.h>
+#else
+#include <AvailabilityMacros.h>  // For MAC_OS_X_VERSION_*
+#include <CoreFoundation/CFString.h>
+#endif  // __APPLE__
+
+#ifdef __APPLE__
+static bool APPLE = true;
+#else
+static bool APPLE = false;
+#endif
+
+#if ASAN_HAS_EXCEPTIONS
+# define ASAN_THROW(x) throw (x)
+#else
+# define ASAN_THROW(x)
+#endif
+
+#include <sys/mman.h>
+
+typedef uint8_t   U1;
+typedef uint16_t  U2;
+typedef uint32_t  U4;
+typedef uint64_t  U8;
+
+static const char *progname;
+static const int kPageSize = 4096;
+
+// Simple stand-alone pseudorandom number generator.
+// Current algorithm is ANSI C linear congruential PRNG.
+static inline uint32_t my_rand(uint32_t* state) {
+  return (*state = *state * 1103515245 + 12345) >> 16;
+}
+
+static uint32_t global_seed = 0;
+
+const size_t kLargeMalloc = 1 << 24;
+
+template<typename T>
+NOINLINE void asan_write(T *a) {
+  *a = 0;
+}
+
+NOINLINE void asan_write_sized_aligned(uint8_t *p, size_t size) {
+  EXPECT_EQ(0, ((uintptr_t)p % size));
+  if      (size == 1) asan_write((uint8_t*)p);
+  else if (size == 2) asan_write((uint16_t*)p);
+  else if (size == 4) asan_write((uint32_t*)p);
+  else if (size == 8) asan_write((uint64_t*)p);
+}
+
+NOINLINE void *malloc_fff(size_t size) {
+  void *res = malloc/**/(size); break_optimization(0); return res;}
+NOINLINE void *malloc_eee(size_t size) {
+  void *res = malloc_fff(size); break_optimization(0); return res;}
+NOINLINE void *malloc_ddd(size_t size) {
+  void *res = malloc_eee(size); break_optimization(0); return res;}
+NOINLINE void *malloc_ccc(size_t size) {
+  void *res = malloc_ddd(size); break_optimization(0); return res;}
+NOINLINE void *malloc_bbb(size_t size) {
+  void *res = malloc_ccc(size); break_optimization(0); return res;}
+NOINLINE void *malloc_aaa(size_t size) {
+  void *res = malloc_bbb(size); break_optimization(0); return res;}
+
+#ifndef __APPLE__
+NOINLINE void *memalign_fff(size_t alignment, size_t size) {
+  void *res = memalign/**/(alignment, size); break_optimization(0); return res;}
+NOINLINE void *memalign_eee(size_t alignment, size_t size) {
+  void *res = memalign_fff(alignment, size); break_optimization(0); return res;}
+NOINLINE void *memalign_ddd(size_t alignment, size_t size) {
+  void *res = memalign_eee(alignment, size); break_optimization(0); return res;}
+NOINLINE void *memalign_ccc(size_t alignment, size_t size) {
+  void *res = memalign_ddd(alignment, size); break_optimization(0); return res;}
+NOINLINE void *memalign_bbb(size_t alignment, size_t size) {
+  void *res = memalign_ccc(alignment, size); break_optimization(0); return res;}
+NOINLINE void *memalign_aaa(size_t alignment, size_t size) {
+  void *res = memalign_bbb(alignment, size); break_optimization(0); return res;}
+#endif  // __APPLE__
+
+
+NOINLINE void free_ccc(void *p) { free(p); break_optimization(0);}
+NOINLINE void free_bbb(void *p) { free_ccc(p); break_optimization(0);}
+NOINLINE void free_aaa(void *p) { free_bbb(p); break_optimization(0);}
+
+template<typename T>
+NOINLINE void oob_test(int size, int off) {
+  char *p = (char*)malloc_aaa(size);
+  // fprintf(stderr, "writing %d byte(s) into [%p,%p) with offset %d\n",
+  //        sizeof(T), p, p + size, off);
+  asan_write((T*)(p + off));
+  free_aaa(p);
+}
+
+
+template<typename T>
+NOINLINE void uaf_test(int size, int off) {
+  char *p = (char *)malloc_aaa(size);
+  free_aaa(p);
+  for (int i = 1; i < 100; i++)
+    free_aaa(malloc_aaa(i));
+  fprintf(stderr, "writing %ld byte(s) at %p with offset %d\n",
+          (long)sizeof(T), p, off);
+  asan_write((T*)(p + off));
+}
+
+TEST(AddressSanitizer, HasFeatureAddressSanitizerTest) {
+#if defined(__has_feature) && __has_feature(address_sanitizer)
+  bool asan = 1;
+#else
+  bool asan = 0;
+#endif
+  EXPECT_EQ(true, asan);
+}
+
+TEST(AddressSanitizer, SimpleDeathTest) {
+  EXPECT_DEATH(exit(1), "");
+}
+
+TEST(AddressSanitizer, VariousMallocsTest) {
+  // fprintf(stderr, "malloc:\n");
+  int *a = (int*)malloc(100 * sizeof(int));
+  a[50] = 0;
+  free(a);
+
+  // fprintf(stderr, "realloc:\n");
+  int *r = (int*)malloc(10);
+  r = (int*)realloc(r, 2000 * sizeof(int));
+  r[1000] = 0;
+  free(r);
+
+  // fprintf(stderr, "operator new []\n");
+  int *b = new int[100];
+  b[50] = 0;
+  delete [] b;
+
+  // fprintf(stderr, "operator new\n");
+  int *c = new int;
+  *c = 0;
+  delete c;
+
+#if !defined(__APPLE__) && !defined(ANDROID)
+  // fprintf(stderr, "posix_memalign\n");
+  int *pm;
+  int pm_res = posix_memalign((void**)&pm, kPageSize, kPageSize);
+  EXPECT_EQ(0, pm_res);
+  free(pm);
+#endif
+
+#if !defined(__APPLE__)
+  int *ma = (int*)memalign(kPageSize, kPageSize);
+  EXPECT_EQ(0, (uintptr_t)ma % kPageSize);
+  ma[123] = 0;
+  free(ma);
+#endif  // __APPLE__
+}
+
+TEST(AddressSanitizer, CallocTest) {
+  int *a = (int*)calloc(100, sizeof(int));
+  EXPECT_EQ(0, a[10]);
+  free(a);
+}
+
+TEST(AddressSanitizer, VallocTest) {
+  void *a = valloc(100);
+  EXPECT_EQ(0, (uintptr_t)a % kPageSize);
+  free(a);
+}
+
+#ifndef __APPLE__
+TEST(AddressSanitizer, PvallocTest) {
+  char *a = (char*)pvalloc(kPageSize + 100);
+  EXPECT_EQ(0, (uintptr_t)a % kPageSize);
+  a[kPageSize + 101] = 1;  // we should not report an error here.
+  free(a);
+
+  a = (char*)pvalloc(0);  // pvalloc(0) should allocate at least one page.
+  EXPECT_EQ(0, (uintptr_t)a % kPageSize);
+  a[101] = 1;  // we should not report an error here.
+  free(a);
+}
+#endif  // __APPLE__
+
+void *TSDWorker(void *test_key) {
+  if (test_key) {
+    pthread_setspecific(*(pthread_key_t*)test_key, (void*)0xfeedface);
+  }
+  return NULL;
+}
+
+void TSDDestructor(void *tsd) {
+  // Spawning a thread will check that the current thread id is not -1.
+  pthread_t th;
+  pthread_create(&th, NULL, TSDWorker, NULL);
+  pthread_join(th, NULL);
+}
+
+// This tests triggers the thread-specific data destruction fiasco which occurs
+// if we don't manage the TSD destructors ourselves. We create a new pthread
+// key with a non-NULL destructor which is likely to be put after the destructor
+// of AsanThread in the list of destructors.
+// In this case the TSD for AsanThread will be destroyed before TSDDestructor
+// is called for the child thread, and a CHECK will fail when we call
+// pthread_create() to spawn the grandchild.
+TEST(AddressSanitizer, DISABLED_TSDTest) {
+  pthread_t th;
+  pthread_key_t test_key;
+  pthread_key_create(&test_key, TSDDestructor);
+  pthread_create(&th, NULL, TSDWorker, &test_key);
+  pthread_join(th, NULL);
+  pthread_key_delete(test_key);
+}
+
+template<typename T>
+void OOBTest() {
+  char expected_str[100];
+  for (int size = sizeof(T); size < 20; size += 5) {
+    for (int i = -5; i < 0; i++) {
+      const char *str =
+          "is located.*%d byte.*to the left";
+      sprintf(expected_str, str, abs(i));
+      EXPECT_DEATH(oob_test<T>(size, i), expected_str);
+    }
+
+    for (int i = 0; i < size - sizeof(T) + 1; i++)
+      oob_test<T>(size, i);
+
+    for (int i = size - sizeof(T) + 1; i <= size + 3 * sizeof(T); i++) {
+      const char *str =
+          "is located.*%d byte.*to the right";
+      int off = i >= size ? (i - size) : 0;
+      // we don't catch unaligned partially OOB accesses.
+      if (i % sizeof(T)) continue;
+      sprintf(expected_str, str, off);
+      EXPECT_DEATH(oob_test<T>(size, i), expected_str);
+    }
+  }
+
+  EXPECT_DEATH(oob_test<T>(kLargeMalloc, -1),
+          "is located.*1 byte.*to the left");
+  EXPECT_DEATH(oob_test<T>(kLargeMalloc, kLargeMalloc),
+          "is located.*0 byte.*to the right");
+}
+
+// TODO(glider): the following tests are EXTREMELY slow on Darwin:
+//   AddressSanitizer.OOB_char (125503 ms)
+//   AddressSanitizer.OOB_int (126890 ms)
+//   AddressSanitizer.OOBRightTest (315605 ms)
+//   AddressSanitizer.SimpleStackTest (366559 ms)
+
+TEST(AddressSanitizer, OOB_char) {
+  OOBTest<U1>();
+}
+
+TEST(AddressSanitizer, OOB_int) {
+  OOBTest<U4>();
+}
+
+TEST(AddressSanitizer, OOBRightTest) {
+  for (size_t access_size = 1; access_size <= 8; access_size *= 2) {
+    for (size_t alloc_size = 1; alloc_size <= 8; alloc_size++) {
+      for (size_t offset = 0; offset <= 8; offset += access_size) {
+        void *p = malloc(alloc_size);
+        // allocated: [p, p + alloc_size)
+        // accessed:  [p + offset, p + offset + access_size)
+        uint8_t *addr = (uint8_t*)p + offset;
+        if (offset + access_size <= alloc_size) {
+          asan_write_sized_aligned(addr, access_size);
+        } else {
+          int outside_bytes = offset > alloc_size ? (offset - alloc_size) : 0;
+          const char *str =
+              "is located.%d *byte.*to the right";
+          char expected_str[100];
+          sprintf(expected_str, str, outside_bytes);
+          EXPECT_DEATH(asan_write_sized_aligned(addr, access_size),
+                       expected_str);
+        }
+        free(p);
+      }
+    }
+  }
+}
+
+TEST(AddressSanitizer, UAF_char) {
+  const char *uaf_string = "AddressSanitizer.*heap-use-after-free";
+  EXPECT_DEATH(uaf_test<U1>(1, 0), uaf_string);
+  EXPECT_DEATH(uaf_test<U1>(10, 0), uaf_string);
+  EXPECT_DEATH(uaf_test<U1>(10, 10), uaf_string);
+  EXPECT_DEATH(uaf_test<U1>(kLargeMalloc, 0), uaf_string);
+  EXPECT_DEATH(uaf_test<U1>(kLargeMalloc, kLargeMalloc / 2), uaf_string);
+}
+
+#if ASAN_HAS_BLACKLIST
+TEST(AddressSanitizer, IgnoreTest) {
+  int *x = Ident(new int);
+  delete Ident(x);
+  *x = 0;
+}
+#endif  // ASAN_HAS_BLACKLIST
+
+struct StructWithBitField {
+  int bf1:1;
+  int bf2:1;
+  int bf3:1;
+  int bf4:29;
+};
+
+TEST(AddressSanitizer, BitFieldPositiveTest) {
+  StructWithBitField *x = new StructWithBitField;
+  delete Ident(x);
+  EXPECT_DEATH(x->bf1 = 0, "use-after-free");
+  EXPECT_DEATH(x->bf2 = 0, "use-after-free");
+  EXPECT_DEATH(x->bf3 = 0, "use-after-free");
+  EXPECT_DEATH(x->bf4 = 0, "use-after-free");
+};
+
+struct StructWithBitFields_8_24 {
+  int a:8;
+  int b:24;
+};
+
+TEST(AddressSanitizer, BitFieldNegativeTest) {
+  StructWithBitFields_8_24 *x = Ident(new StructWithBitFields_8_24);
+  x->a = 0;
+  x->b = 0;
+  delete Ident(x);
+}
+
+TEST(AddressSanitizer, OutOfMemoryTest) {
+  size_t size = __WORDSIZE == 64 ? (size_t)(1ULL << 48) : (0xf0000000);
+  EXPECT_EQ(0, realloc(0, size));
+  EXPECT_EQ(0, realloc(0, ~Ident(0)));
+  EXPECT_EQ(0, malloc(size));
+  EXPECT_EQ(0, malloc(~Ident(0)));
+  EXPECT_EQ(0, calloc(1, size));
+  EXPECT_EQ(0, calloc(1, ~Ident(0)));
+}
+
+#if ASAN_NEEDS_SEGV
+TEST(AddressSanitizer, WildAddressTest) {
+  char *c = (char*)0x123;
+  EXPECT_DEATH(*c = 0, "AddressSanitizer crashed on unknown address");
+}
+#endif
+
+static void MallocStress(size_t n) {
+  uint32_t seed = my_rand(&global_seed);
+  for (size_t iter = 0; iter < 10; iter++) {
+    vector<void *> vec;
+    for (size_t i = 0; i < n; i++) {
+      if ((i % 3) == 0) {
+        if (vec.empty()) continue;
+        size_t idx = my_rand(&seed) % vec.size();
+        void *ptr = vec[idx];
+        vec[idx] = vec.back();
+        vec.pop_back();
+        free_aaa(ptr);
+      } else {
+        size_t size = my_rand(&seed) % 1000 + 1;
+#ifndef __APPLE__
+        size_t alignment = 1 << (my_rand(&seed) % 7 + 3);
+        char *ptr = (char*)memalign_aaa(alignment, size);
+#else
+        char *ptr = (char*) malloc_aaa(size);
+#endif
+        vec.push_back(ptr);
+        ptr[0] = 0;
+        ptr[size-1] = 0;
+        ptr[size/2] = 0;
+      }
+    }
+    for (size_t i = 0; i < vec.size(); i++)
+      free_aaa(vec[i]);
+  }
+}
+
+TEST(AddressSanitizer, MallocStressTest) {
+  MallocStress((ASAN_LOW_MEMORY) ? 20000 : 200000);
+}
+
+static void TestLargeMalloc(size_t size) {
+  char buff[1024];
+  sprintf(buff, "is located 1 bytes to the left of %lu-byte", (long)size);
+  EXPECT_DEATH(Ident((char*)malloc(size))[-1] = 0, buff);
+}
+
+TEST(AddressSanitizer, LargeMallocTest) {
+  for (int i = 113; i < (1 << 28); i = i * 2 + 13) {
+    TestLargeMalloc(i);
+  }
+}
+
+#if ASAN_LOW_MEMORY != 1
+TEST(AddressSanitizer, HugeMallocTest) {
+#ifdef __APPLE__
+  // It was empirically found out that 1215 megabytes is the maximum amount of
+  // memory available to the process under AddressSanitizer on Darwin.
+  // (the libSystem malloc() allows allocating up to 2300 megabytes without
+  // ASan).
+  size_t n_megs = __WORDSIZE == 32 ? 1200 : 4100;
+#else
+  size_t n_megs = __WORDSIZE == 32 ? 2600 : 4100;
+#endif
+  TestLargeMalloc(n_megs << 20);
+}
+#endif
+
+TEST(AddressSanitizer, ThreadedMallocStressTest) {
+  const int kNumThreads = 4;
+  const int kNumIterations = (ASAN_LOW_MEMORY) ? 10000 : 100000;
+  pthread_t t[kNumThreads];
+  for (int i = 0; i < kNumThreads; i++) {
+    pthread_create(&t[i], 0, (void* (*)(void *x))MallocStress,
+        (void*)kNumIterations);
+  }
+  for (int i = 0; i < kNumThreads; i++) {
+    pthread_join(t[i], 0);
+  }
+}
+
+void *ManyThreadsWorker(void *a) {
+  for (int iter = 0; iter < 100; iter++) {
+    for (size_t size = 100; size < 2000; size *= 2) {
+      free(Ident(malloc(size)));
+    }
+  }
+  return 0;
+}
+
+TEST(AddressSanitizer, ManyThreadsTest) {
+  const size_t kNumThreads = __WORDSIZE == 32 ? 30 : 1000;
+  pthread_t t[kNumThreads];
+  for (size_t i = 0; i < kNumThreads; i++) {
+    pthread_create(&t[i], 0, (void* (*)(void *x))ManyThreadsWorker, (void*)i);
+  }
+  for (size_t i = 0; i < kNumThreads; i++) {
+    pthread_join(t[i], 0);
+  }
+}
+
+TEST(AddressSanitizer, ReallocTest) {
+  const int kMinElem = 5;
+  int *ptr = (int*)malloc(sizeof(int) * kMinElem);
+  ptr[3] = 3;
+  for (int i = 0; i < 10000; i++) {
+    ptr = (int*)realloc(ptr,
+        (my_rand(&global_seed) % 1000 + kMinElem) * sizeof(int));
+    EXPECT_EQ(3, ptr[3]);
+  }
+}
+
+#ifndef __APPLE__
+static const char *kMallocUsableSizeErrorMsg =
+  "AddressSanitizer attempting to call malloc_usable_size()";
+
+TEST(AddressSanitizer, MallocUsableSizeTest) {
+  const size_t kArraySize = 100;
+  char *array = Ident((char*)malloc(kArraySize));
+  int *int_ptr = Ident(new int);
+  EXPECT_EQ(0, malloc_usable_size(NULL));
+  EXPECT_EQ(kArraySize, malloc_usable_size(array));
+  EXPECT_EQ(sizeof(int), malloc_usable_size(int_ptr));
+  EXPECT_DEATH(malloc_usable_size((void*)0x123), kMallocUsableSizeErrorMsg);
+  EXPECT_DEATH(malloc_usable_size(array + kArraySize / 2),
+               kMallocUsableSizeErrorMsg);
+  free(array);
+  EXPECT_DEATH(malloc_usable_size(array), kMallocUsableSizeErrorMsg);
+}
+#endif
+
+void WrongFree() {
+  int *x = (int*)malloc(100 * sizeof(int));
+  // Use the allocated memory, otherwise Clang will optimize it out.
+  Ident(x);
+  free(x + 1);
+}
+
+TEST(AddressSanitizer, WrongFreeTest) {
+  EXPECT_DEATH(WrongFree(),
+               "ERROR: AddressSanitizer attempting free.*not malloc");
+}
+
+void DoubleFree() {
+  int *x = (int*)malloc(100 * sizeof(int));
+  fprintf(stderr, "DoubleFree: x=%p\n", x);
+  free(x);
+  free(x);
+  fprintf(stderr, "should have failed in the second free(%p)\n", x);
+  abort();
+}
+
+TEST(AddressSanitizer, DoubleFreeTest) {
+  EXPECT_DEATH(DoubleFree(), ASAN_PCRE_DOTALL
+               "ERROR: AddressSanitizer attempting double-free"
+               ".*is located 0 bytes inside of 400-byte region"
+               ".*freed by thread T0 here"
+               ".*previously allocated by thread T0 here");
+}
+
+template<int kSize>
+NOINLINE void SizedStackTest() {
+  char a[kSize];
+  char  *A = Ident((char*)&a);
+  for (size_t i = 0; i < kSize; i++)
+    A[i] = i;
+  EXPECT_DEATH(A[-1] = 0, "");
+  EXPECT_DEATH(A[-20] = 0, "");
+  EXPECT_DEATH(A[-31] = 0, "");
+  EXPECT_DEATH(A[kSize] = 0, "");
+  EXPECT_DEATH(A[kSize + 1] = 0, "");
+  EXPECT_DEATH(A[kSize + 10] = 0, "");
+  EXPECT_DEATH(A[kSize + 31] = 0, "");
+}
+
+TEST(AddressSanitizer, SimpleStackTest) {
+  SizedStackTest<1>();
+  SizedStackTest<2>();
+  SizedStackTest<3>();
+  SizedStackTest<4>();
+  SizedStackTest<5>();
+  SizedStackTest<6>();
+  SizedStackTest<7>();
+  SizedStackTest<16>();
+  SizedStackTest<25>();
+  SizedStackTest<34>();
+  SizedStackTest<43>();
+  SizedStackTest<51>();
+  SizedStackTest<62>();
+  SizedStackTest<64>();
+  SizedStackTest<128>();
+}
+
+TEST(AddressSanitizer, ManyStackObjectsTest) {
+  char XXX[10];
+  char YYY[20];
+  char ZZZ[30];
+  Ident(XXX);
+  Ident(YYY);
+  EXPECT_DEATH(Ident(ZZZ)[-1] = 0, ASAN_PCRE_DOTALL "XXX.*YYY.*ZZZ");
+}
+
+NOINLINE static void Frame0(int frame, char *a, char *b, char *c) {
+  char d[4] = {0};
+  char *D = Ident(d);
+  switch (frame) {
+    case 3: a[5]++; break;
+    case 2: b[5]++; break;
+    case 1: c[5]++; break;
+    case 0: D[5]++; break;
+  }
+}
+NOINLINE static void Frame1(int frame, char *a, char *b) {
+  char c[4] = {0}; Frame0(frame, a, b, c);
+  break_optimization(0);
+}
+NOINLINE static void Frame2(int frame, char *a) {
+  char b[4] = {0}; Frame1(frame, a, b);
+  break_optimization(0);
+}
+NOINLINE static void Frame3(int frame) {
+  char a[4] = {0}; Frame2(frame, a);
+  break_optimization(0);
+}
+
+TEST(AddressSanitizer, GuiltyStackFrame0Test) {
+  EXPECT_DEATH(Frame3(0), "located .*in frame <.*Frame0");
+}
+TEST(AddressSanitizer, GuiltyStackFrame1Test) {
+  EXPECT_DEATH(Frame3(1), "located .*in frame <.*Frame1");
+}
+TEST(AddressSanitizer, GuiltyStackFrame2Test) {
+  EXPECT_DEATH(Frame3(2), "located .*in frame <.*Frame2");
+}
+TEST(AddressSanitizer, GuiltyStackFrame3Test) {
+  EXPECT_DEATH(Frame3(3), "located .*in frame <.*Frame3");
+}
+
+NOINLINE void LongJmpFunc1(jmp_buf buf) {
+  // create three red zones for these two stack objects.
+  int a;
+  int b;
+
+  int *A = Ident(&a);
+  int *B = Ident(&b);
+  *A = *B;
+  longjmp(buf, 1);
+}
+
+NOINLINE void UnderscopeLongJmpFunc1(jmp_buf buf) {
+  // create three red zones for these two stack objects.
+  int a;
+  int b;
+
+  int *A = Ident(&a);
+  int *B = Ident(&b);
+  *A = *B;
+  _longjmp(buf, 1);
+}
+
+NOINLINE void SigLongJmpFunc1(sigjmp_buf buf) {
+  // create three red zones for these two stack objects.
+  int a;
+  int b;
+
+  int *A = Ident(&a);
+  int *B = Ident(&b);
+  *A = *B;
+  siglongjmp(buf, 1);
+}
+
+
+NOINLINE void TouchStackFunc() {
+  int a[100];  // long array will intersect with redzones from LongJmpFunc1.
+  int *A = Ident(a);
+  for (int i = 0; i < 100; i++)
+    A[i] = i*i;
+}
+
+// Test that we handle longjmp and do not report fals positives on stack.
+TEST(AddressSanitizer, LongJmpTest) {
+  static jmp_buf buf;
+  if (!setjmp(buf)) {
+    LongJmpFunc1(buf);
+  } else {
+    TouchStackFunc();
+  }
+}
+
+TEST(AddressSanitizer, UnderscopeLongJmpTest) {
+  static jmp_buf buf;
+  if (!_setjmp(buf)) {
+    UnderscopeLongJmpFunc1(buf);
+  } else {
+    TouchStackFunc();
+  }
+}
+
+TEST(AddressSanitizer, SigLongJmpTest) {
+  static sigjmp_buf buf;
+  if (!sigsetjmp(buf, 1)) {
+    SigLongJmpFunc1(buf);
+  } else {
+    TouchStackFunc();
+  }
+}
+
+#ifdef __EXCEPTIONS
+NOINLINE void ThrowFunc() {
+  // create three red zones for these two stack objects.
+  int a;
+  int b;
+
+  int *A = Ident(&a);
+  int *B = Ident(&b);
+  *A = *B;
+  ASAN_THROW(1);
+}
+
+TEST(AddressSanitizer, CxxExceptionTest) {
+  if (ASAN_UAR) return;
+  // TODO(kcc): this test crashes on 32-bit for some reason...
+  if (__WORDSIZE == 32) return;
+  try {
+    ThrowFunc();
+  } catch(...) {}
+  TouchStackFunc();
+}
+#endif
+
+void *ThreadStackReuseFunc1(void *unused) {
+  // create three red zones for these two stack objects.
+  int a;
+  int b;
+
+  int *A = Ident(&a);
+  int *B = Ident(&b);
+  *A = *B;
+  pthread_exit(0);
+  return 0;
+}
+
+void *ThreadStackReuseFunc2(void *unused) {
+  TouchStackFunc();
+  return 0;
+}
+
+TEST(AddressSanitizer, ThreadStackReuseTest) {
+  pthread_t t;
+  pthread_create(&t, 0, ThreadStackReuseFunc1, 0);
+  pthread_join(t, 0);
+  pthread_create(&t, 0, ThreadStackReuseFunc2, 0);
+  pthread_join(t, 0);
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+TEST(AddressSanitizer, Store128Test) {
+  char *a = Ident((char*)malloc(Ident(12)));
+  char *p = a;
+  if (((uintptr_t)a % 16) != 0)
+    p = a + 8;
+  assert(((uintptr_t)p % 16) == 0);
+  __m128i value_wide = _mm_set1_epi16(0x1234);
+  EXPECT_DEATH(_mm_store_si128((__m128i*)p, value_wide),
+               "AddressSanitizer heap-buffer-overflow");
+  EXPECT_DEATH(_mm_store_si128((__m128i*)p, value_wide),
+               "WRITE of size 16");
+  EXPECT_DEATH(_mm_store_si128((__m128i*)p, value_wide),
+               "located 0 bytes to the right of 12-byte");
+  free(a);
+}
+#endif
+
+static string RightOOBErrorMessage(int oob_distance) {
+  assert(oob_distance >= 0);
+  char expected_str[100];
+  sprintf(expected_str, "located %d bytes to the right", oob_distance);
+  return string(expected_str);
+}
+
+static string LeftOOBErrorMessage(int oob_distance) {
+  assert(oob_distance > 0);
+  char expected_str[100];
+  sprintf(expected_str, "located %d bytes to the left", oob_distance);
+  return string(expected_str);
+}
+
+template<typename T>
+void MemSetOOBTestTemplate(size_t length) {
+  if (length == 0) return;
+  size_t size = Ident(sizeof(T) * length);
+  T *array = Ident((T*)malloc(size));
+  int element = Ident(42);
+  int zero = Ident(0);
+  // memset interval inside array
+  memset(array, element, size);
+  memset(array, element, size - 1);
+  memset(array + length - 1, element, sizeof(T));
+  memset(array, element, 1);
+
+  // memset 0 bytes
+  memset(array - 10, element, zero);
+  memset(array - 1, element, zero);
+  memset(array, element, zero);
+  memset(array + length, 0, zero);
+  memset(array + length + 1, 0, zero);
+
+  // try to memset bytes to the right of array
+  EXPECT_DEATH(memset(array, 0, size + 1),
+               RightOOBErrorMessage(0));
+  EXPECT_DEATH(memset((char*)(array + length) - 1, element, 6),
+               RightOOBErrorMessage(4));
+  EXPECT_DEATH(memset(array + 1, element, size + sizeof(T)),
+               RightOOBErrorMessage(2 * sizeof(T) - 1));
+  // whole interval is to the right
+  EXPECT_DEATH(memset(array + length + 1, 0, 10),
+               RightOOBErrorMessage(sizeof(T)));
+
+  // try to memset bytes to the left of array
+  EXPECT_DEATH(memset((char*)array - 1, element, size),
+               LeftOOBErrorMessage(1));
+  EXPECT_DEATH(memset((char*)array - 5, 0, 6),
+               LeftOOBErrorMessage(5));
+  EXPECT_DEATH(memset(array - 5, element, size + 5 * sizeof(T)),
+               LeftOOBErrorMessage(5 * sizeof(T)));
+  // whole interval is to the left
+  EXPECT_DEATH(memset(array - 2, 0, sizeof(T)),
+               LeftOOBErrorMessage(2 * sizeof(T)));
+
+  // try to memset bytes both to the left & to the right
+  EXPECT_DEATH(memset((char*)array - 2, element, size + 4),
+               LeftOOBErrorMessage(2));
+
+  free(array);
+}
+
+TEST(AddressSanitizer, MemSetOOBTest) {
+  MemSetOOBTestTemplate<char>(100);
+  MemSetOOBTestTemplate<int>(5);
+  MemSetOOBTestTemplate<double>(256);
+  // We can test arrays of structres/classes here, but what for?
+}
+
+// Same test for memcpy and memmove functions
+template <typename T, class M>
+void MemTransferOOBTestTemplate(size_t length) {
+  if (length == 0) return;
+  size_t size = Ident(sizeof(T) * length);
+  T *src = Ident((T*)malloc(size));
+  T *dest = Ident((T*)malloc(size));
+  int zero = Ident(0);
+
+  // valid transfer of bytes between arrays
+  M::transfer(dest, src, size);
+  M::transfer(dest + 1, src, size - sizeof(T));
+  M::transfer(dest, src + length - 1, sizeof(T));
+  M::transfer(dest, src, 1);
+
+  // transfer zero bytes
+  M::transfer(dest - 1, src, 0);
+  M::transfer(dest + length, src, zero);
+  M::transfer(dest, src - 1, zero);
+  M::transfer(dest, src, zero);
+
+  // try to change mem to the right of dest
+  EXPECT_DEATH(M::transfer(dest + 1, src, size),
+               RightOOBErrorMessage(sizeof(T) - 1));
+  EXPECT_DEATH(M::transfer((char*)(dest + length) - 1, src, 5),
+               RightOOBErrorMessage(3));
+
+  // try to change mem to the left of dest
+  EXPECT_DEATH(M::transfer(dest - 2, src, size),
+               LeftOOBErrorMessage(2 * sizeof(T)));
+  EXPECT_DEATH(M::transfer((char*)dest - 3, src, 4),
+               LeftOOBErrorMessage(3));
+
+  // try to access mem to the right of src
+  EXPECT_DEATH(M::transfer(dest, src + 2, size),
+               RightOOBErrorMessage(2 * sizeof(T) - 1));
+  EXPECT_DEATH(M::transfer(dest, (char*)(src + length) - 3, 6),
+               RightOOBErrorMessage(2));
+
+  // try to access mem to the left of src
+  EXPECT_DEATH(M::transfer(dest, src - 1, size),
+               LeftOOBErrorMessage(sizeof(T)));
+  EXPECT_DEATH(M::transfer(dest, (char*)src - 6, 7),
+               LeftOOBErrorMessage(6));
+
+  // Generally we don't need to test cases where both accessing src and writing
+  // to dest address to poisoned memory.
+
+  T *big_src = Ident((T*)malloc(size * 2));
+  T *big_dest = Ident((T*)malloc(size * 2));
+  // try to change mem to both sides of dest
+  EXPECT_DEATH(M::transfer(dest - 1, big_src, size * 2),
+               LeftOOBErrorMessage(sizeof(T)));
+  // try to access mem to both sides of src
+  EXPECT_DEATH(M::transfer(big_dest, src - 2, size * 2),
+               LeftOOBErrorMessage(2 * sizeof(T)));
+
+  free(src);
+  free(dest);
+  free(big_src);
+  free(big_dest);
+}
+
+class MemCpyWrapper {
+ public:
+  static void* transfer(void *to, const void *from, size_t size) {
+    return memcpy(to, from, size);
+  }
+};
+TEST(AddressSanitizer, MemCpyOOBTest) {
+  MemTransferOOBTestTemplate<char, MemCpyWrapper>(100);
+  MemTransferOOBTestTemplate<int, MemCpyWrapper>(1024);
+}
+
+class MemMoveWrapper {
+ public:
+  static void* transfer(void *to, const void *from, size_t size) {
+    return memmove(to, from, size);
+  }
+};
+TEST(AddressSanitizer, MemMoveOOBTest) {
+  MemTransferOOBTestTemplate<char, MemMoveWrapper>(100);
+  MemTransferOOBTestTemplate<int, MemMoveWrapper>(1024);
+}
+
+// Tests for string functions
+
+// Used for string functions tests
+static char global_string[] = "global";
+static size_t global_string_length = 6;
+
+// Input to a test is a zero-terminated string str with given length
+// Accesses to the bytes to the left and to the right of str
+// are presumed to produce OOB errors
+void StrLenOOBTestTemplate(char *str, size_t length, bool is_global) {
+  // Normal strlen calls
+  EXPECT_EQ(strlen(str), length);
+  if (length > 0) {
+    EXPECT_EQ(strlen(str + 1), length - 1);
+    EXPECT_EQ(strlen(str + length), 0);
+  }
+  // Arg of strlen is not malloced, OOB access
+  if (!is_global) {
+    // We don't insert RedZones to the left of global variables
+    EXPECT_DEATH(Ident(strlen(str - 1)), LeftOOBErrorMessage(1));
+    EXPECT_DEATH(Ident(strlen(str - 5)), LeftOOBErrorMessage(5));
+  }
+  EXPECT_DEATH(Ident(strlen(str + length + 1)), RightOOBErrorMessage(0));
+  // Overwrite terminator
+  str[length] = 'a';
+  // String is not zero-terminated, strlen will lead to OOB access
+  EXPECT_DEATH(Ident(strlen(str)), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(strlen(str + length)), RightOOBErrorMessage(0));
+  // Restore terminator
+  str[length] = 0;
+}
+TEST(AddressSanitizer, StrLenOOBTest) {
+  // Check heap-allocated string
+  size_t length = Ident(10);
+  char *heap_string = Ident((char*)malloc(length + 1));
+  char stack_string[10 + 1];
+  for (int i = 0; i < length; i++) {
+    heap_string[i] = 'a';
+    stack_string[i] = 'b';
+  }
+  heap_string[length] = 0;
+  stack_string[length] = 0;
+  StrLenOOBTestTemplate(heap_string, length, false);
+  // TODO(samsonov): Fix expected messages in StrLenOOBTestTemplate to
+  //      make test for stack_string work. Or move it to output tests.
+  // StrLenOOBTestTemplate(stack_string, length, false);
+  StrLenOOBTestTemplate(global_string, global_string_length, true);
+  free(heap_string);
+}
+
+static inline char* MallocAndMemsetString(size_t size, char ch) {
+  char *s = Ident((char*)malloc(size));
+  memset(s, ch, size);
+  return s;
+}
+static inline char* MallocAndMemsetString(size_t size) {
+  return MallocAndMemsetString(size, 'z');
+}
+
+#ifndef __APPLE__
+TEST(AddressSanitizer, StrNLenOOBTest) {
+  size_t size = Ident(123);
+  char *str = MallocAndMemsetString(size);
+  // Normal strnlen calls.
+  Ident(strnlen(str - 1, 0));
+  Ident(strnlen(str, size));
+  Ident(strnlen(str + size - 1, 1));
+  str[size - 1] = '\0';
+  Ident(strnlen(str, 2 * size));
+  // Argument points to not allocated memory.
+  EXPECT_DEATH(Ident(strnlen(str - 1, 1)), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(strnlen(str + size, 1)), RightOOBErrorMessage(0));
+  // Overwrite the terminating '\0' and hit unallocated memory.
+  str[size - 1] = 'z';
+  EXPECT_DEATH(Ident(strnlen(str, size + 1)), RightOOBErrorMessage(0));
+  free(str);
+}
+#endif
+
+TEST(AddressSanitizer, StrDupOOBTest) {
+  size_t size = Ident(42);
+  char *str = MallocAndMemsetString(size);
+  char *new_str;
+  // Normal strdup calls.
+  str[size - 1] = '\0';
+  new_str = strdup(str);
+  free(new_str);
+  new_str = strdup(str + size - 1);
+  free(new_str);
+  // Argument points to not allocated memory.
+  EXPECT_DEATH(Ident(strdup(str - 1)), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(strdup(str + size)), RightOOBErrorMessage(0));
+  // Overwrite the terminating '\0' and hit unallocated memory.
+  str[size - 1] = 'z';
+  EXPECT_DEATH(Ident(strdup(str)), RightOOBErrorMessage(0));
+  free(str);
+}
+
+TEST(AddressSanitizer, StrCpyOOBTest) {
+  size_t to_size = Ident(30);
+  size_t from_size = Ident(6);  // less than to_size
+  char *to = Ident((char*)malloc(to_size));
+  char *from = Ident((char*)malloc(from_size));
+  // Normal strcpy calls.
+  strcpy(from, "hello");
+  strcpy(to, from);
+  strcpy(to + to_size - from_size, from);
+  // Length of "from" is too small.
+  EXPECT_DEATH(Ident(strcpy(from, "hello2")), RightOOBErrorMessage(0));
+  // "to" or "from" points to not allocated memory.
+  EXPECT_DEATH(Ident(strcpy(to - 1, from)), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(strcpy(to, from - 1)), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(strcpy(to, from + from_size)), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(strcpy(to + to_size, from)), RightOOBErrorMessage(0));
+  // Overwrite the terminating '\0' character and hit unallocated memory.
+  from[from_size - 1] = '!';
+  EXPECT_DEATH(Ident(strcpy(to, from)), RightOOBErrorMessage(0));
+  free(to);
+  free(from);
+}
+
+TEST(AddressSanitizer, StrNCpyOOBTest) {
+  size_t to_size = Ident(20);
+  size_t from_size = Ident(6);  // less than to_size
+  char *to = Ident((char*)malloc(to_size));
+  // From is a zero-terminated string "hello\0" of length 6
+  char *from = Ident((char*)malloc(from_size));
+  strcpy(from, "hello");
+  // copy 0 bytes
+  strncpy(to, from, 0);
+  strncpy(to - 1, from - 1, 0);
+  // normal strncpy calls
+  strncpy(to, from, from_size);
+  strncpy(to, from, to_size);
+  strncpy(to, from + from_size - 1, to_size);
+  strncpy(to + to_size - 1, from, 1);
+  // One of {to, from} points to not allocated memory
+  EXPECT_DEATH(Ident(strncpy(to, from - 1, from_size)),
+               LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(strncpy(to - 1, from, from_size)),
+               LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(strncpy(to, from + from_size, 1)),
+               RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(strncpy(to + to_size, from, 1)),
+               RightOOBErrorMessage(0));
+  // Length of "to" is too small
+  EXPECT_DEATH(Ident(strncpy(to + to_size - from_size + 1, from, from_size)),
+               RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(strncpy(to + 1, from, to_size)),
+               RightOOBErrorMessage(0));
+  // Overwrite terminator in from
+  from[from_size - 1] = '!';
+  // normal strncpy call
+  strncpy(to, from, from_size);
+  // Length of "from" is too small
+  EXPECT_DEATH(Ident(strncpy(to, from, to_size)),
+               RightOOBErrorMessage(0));
+  free(to);
+  free(from);
+}
+
+typedef char*(*PointerToStrChr)(const char*, int);
+void RunStrChrTest(PointerToStrChr StrChr) {
+  size_t size = Ident(100);
+  char *str = MallocAndMemsetString(size);
+  str[10] = 'q';
+  str[11] = '\0';
+  EXPECT_EQ(str, StrChr(str, 'z'));
+  EXPECT_EQ(str + 10, StrChr(str, 'q'));
+  EXPECT_EQ(NULL, StrChr(str, 'a'));
+  // StrChr argument points to not allocated memory.
+  EXPECT_DEATH(Ident(StrChr(str - 1, 'z')), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(StrChr(str + size, 'z')), RightOOBErrorMessage(0));
+  // Overwrite the terminator and hit not allocated memory.
+  str[11] = 'z';
+  EXPECT_DEATH(Ident(StrChr(str, 'a')), RightOOBErrorMessage(0));
+  free(str);
+}
+TEST(AddressSanitizer, StrChrAndIndexOOBTest) {
+  RunStrChrTest(&strchr);
+  RunStrChrTest(&index);
+}
+
+TEST(AddressSanitizer, StrCmpAndFriendsLogicTest) {
+  // strcmp
+  EXPECT_EQ(0, strcmp("", ""));
+  EXPECT_EQ(0, strcmp("abcd", "abcd"));
+  EXPECT_GT(0, strcmp("ab", "ac"));
+  EXPECT_GT(0, strcmp("abc", "abcd"));
+  EXPECT_LT(0, strcmp("acc", "abc"));
+  EXPECT_LT(0, strcmp("abcd", "abc"));
+
+  // strncmp
+  EXPECT_EQ(0, strncmp("a", "b", 0));
+  EXPECT_EQ(0, strncmp("abcd", "abcd", 10));
+  EXPECT_EQ(0, strncmp("abcd", "abcef", 3));
+  EXPECT_GT(0, strncmp("abcde", "abcfa", 4));
+  EXPECT_GT(0, strncmp("a", "b", 5));
+  EXPECT_GT(0, strncmp("bc", "bcde", 4));
+  EXPECT_LT(0, strncmp("xyz", "xyy", 10));
+  EXPECT_LT(0, strncmp("baa", "aaa", 1));
+  EXPECT_LT(0, strncmp("zyx", "", 2));
+
+  // strcasecmp
+  EXPECT_EQ(0, strcasecmp("", ""));
+  EXPECT_EQ(0, strcasecmp("zzz", "zzz"));
+  EXPECT_EQ(0, strcasecmp("abCD", "ABcd"));
+  EXPECT_GT(0, strcasecmp("aB", "Ac"));
+  EXPECT_GT(0, strcasecmp("ABC", "ABCd"));
+  EXPECT_LT(0, strcasecmp("acc", "abc"));
+  EXPECT_LT(0, strcasecmp("ABCd", "abc"));
+
+  // strncasecmp
+  EXPECT_EQ(0, strncasecmp("a", "b", 0));
+  EXPECT_EQ(0, strncasecmp("abCD", "ABcd", 10));
+  EXPECT_EQ(0, strncasecmp("abCd", "ABcef", 3));
+  EXPECT_GT(0, strncasecmp("abcde", "ABCfa", 4));
+  EXPECT_GT(0, strncasecmp("a", "B", 5));
+  EXPECT_GT(0, strncasecmp("bc", "BCde", 4));
+  EXPECT_LT(0, strncasecmp("xyz", "xyy", 10));
+  EXPECT_LT(0, strncasecmp("Baa", "aaa", 1));
+  EXPECT_LT(0, strncasecmp("zyx", "", 2));
+
+  // memcmp
+  EXPECT_EQ(0, memcmp("a", "b", 0));
+  EXPECT_EQ(0, memcmp("ab\0c", "ab\0c", 4));
+  EXPECT_GT(0, memcmp("\0ab", "\0ac", 3));
+  EXPECT_GT(0, memcmp("abb\0", "abba", 4));
+  EXPECT_LT(0, memcmp("ab\0cd", "ab\0c\0", 5));
+  EXPECT_LT(0, memcmp("zza", "zyx", 3));
+}
+
+typedef int(*PointerToStrCmp)(const char*, const char*);
+void RunStrCmpTest(PointerToStrCmp StrCmp) {
+  size_t size = Ident(100);
+  char *s1 = MallocAndMemsetString(size);
+  char *s2 = MallocAndMemsetString(size);
+  s1[size - 1] = '\0';
+  s2[size - 1] = '\0';
+  // Normal StrCmp calls
+  Ident(StrCmp(s1, s2));
+  Ident(StrCmp(s1, s2 + size - 1));
+  Ident(StrCmp(s1 + size - 1, s2 + size - 1));
+  s1[size - 1] = 'z';
+  s2[size - 1] = 'x';
+  Ident(StrCmp(s1, s2));
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(Ident(StrCmp)(s1 - 1, s2), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(StrCmp)(s1, s2 - 1), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(StrCmp)(s1 + size, s2), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(StrCmp)(s1, s2 + size), RightOOBErrorMessage(0));
+  // Hit unallocated memory and die.
+  s2[size - 1] = 'z';
+  EXPECT_DEATH(Ident(StrCmp)(s1, s1), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(StrCmp)(s1 + size - 1, s2), RightOOBErrorMessage(0));
+  free(s1);
+  free(s2);
+}
+
+TEST(AddressSanitizer, StrCmpOOBTest) {
+  RunStrCmpTest(&strcmp);
+}
+
+TEST(AddressSanitizer, StrCaseCmpOOBTest) {
+  RunStrCmpTest(&strcasecmp);
+}
+
+typedef int(*PointerToStrNCmp)(const char*, const char*, size_t);
+void RunStrNCmpTest(PointerToStrNCmp StrNCmp) {
+  size_t size = Ident(100);
+  char *s1 = MallocAndMemsetString(size);
+  char *s2 = MallocAndMemsetString(size);
+  s1[size - 1] = '\0';
+  s2[size - 1] = '\0';
+  // Normal StrNCmp calls
+  Ident(StrNCmp(s1, s2, size + 2));
+  s1[size - 1] = 'z';
+  s2[size - 1] = 'x';
+  Ident(StrNCmp(s1 + size - 2, s2 + size - 2, size));
+  s2[size - 1] = 'z';
+  Ident(StrNCmp(s1 - 1, s2 - 1, 0));
+  Ident(StrNCmp(s1 + size - 1, s2 + size - 1, 1));
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(Ident(StrNCmp)(s1 - 1, s2, 1), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(StrNCmp)(s1, s2 - 1, 1), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(StrNCmp)(s1 + size, s2, 1), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(StrNCmp)(s1, s2 + size, 1), RightOOBErrorMessage(0));
+  // Hit unallocated memory and die.
+  EXPECT_DEATH(Ident(StrNCmp)(s1 + 1, s2 + 1, size), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(StrNCmp)(s1 + size - 1, s2, 2), RightOOBErrorMessage(0));
+  free(s1);
+  free(s2);
+}
+
+TEST(AddressSanitizer, StrNCmpOOBTest) {
+  RunStrNCmpTest(&strncmp);
+}
+
+TEST(AddressSanitizer, StrNCaseCmpOOBTest) {
+  RunStrNCmpTest(&strncasecmp);
+}
+
+TEST(AddressSanitizer, MemCmpOOBTest) {
+  size_t size = Ident(100);
+  char *s1 = MallocAndMemsetString(size);
+  char *s2 = MallocAndMemsetString(size);
+  // Normal memcmp calls.
+  Ident(memcmp(s1, s2, size));
+  Ident(memcmp(s1 + size - 1, s2 + size - 1, 1));
+  Ident(memcmp(s1 - 1, s2 - 1, 0));
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(Ident(memcmp)(s1 - 1, s2, 1), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(memcmp)(s1, s2 - 1, 1), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(Ident(memcmp)(s1 + size, s2, 1), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(memcmp)(s1, s2 + size, 1), RightOOBErrorMessage(0));
+  // Hit unallocated memory and die.
+  EXPECT_DEATH(Ident(memcmp)(s1 + 1, s2 + 1, size), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Ident(memcmp)(s1 + size - 1, s2, 2), RightOOBErrorMessage(0));
+  // Zero bytes are not terminators and don't prevent from OOB.
+  s1[size - 1] = '\0';
+  s2[size - 1] = '\0';
+  EXPECT_DEATH(Ident(memcmp)(s1, s2, size + 1), RightOOBErrorMessage(0));
+  free(s1);
+  free(s2);
+}
+
+TEST(AddressSanitizer, StrCatOOBTest) {
+  size_t to_size = Ident(100);
+  char *to = MallocAndMemsetString(to_size);
+  to[0] = '\0';
+  size_t from_size = Ident(20);
+  char *from = MallocAndMemsetString(from_size);
+  from[from_size - 1] = '\0';
+  // Normal strcat calls.
+  strcat(to, from);
+  strcat(to, from);
+  strcat(to + from_size, from + from_size - 2);
+  // Catenate empty string is not always an error.
+  strcat(to - 1, from + from_size - 1);
+  // One of arguments points to not allocated memory.
+  EXPECT_DEATH(strcat(to - 1, from), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(strcat(to, from - 1), LeftOOBErrorMessage(1));
+  EXPECT_DEATH(strcat(to + to_size, from), RightOOBErrorMessage(0));
+  EXPECT_DEATH(strcat(to, from + from_size), RightOOBErrorMessage(0));
+
+  // "from" is not zero-terminated.
+  from[from_size - 1] = 'z';
+  EXPECT_DEATH(strcat(to, from), RightOOBErrorMessage(0));
+  from[from_size - 1] = '\0';
+  // "to" is not zero-terminated.
+  memset(to, 'z', to_size);
+  EXPECT_DEATH(strcat(to, from), RightOOBErrorMessage(0));
+  // "to" is too short to fit "from".
+  to[to_size - from_size + 1] = '\0';
+  EXPECT_DEATH(strcat(to, from), RightOOBErrorMessage(0));
+  // length of "to" is just enough.
+  strcat(to, from + 1);
+}
+
+static string OverlapErrorMessage(const string &func) {
+  return func + "-param-overlap";
+}
+
+TEST(AddressSanitizer, StrArgsOverlapTest) {
+  size_t size = Ident(100);
+  char *str = Ident((char*)malloc(size));
+
+// Do not check memcpy() on OS X 10.7 and later, where it actually aliases
+// memmove().
+#if !defined(__APPLE__) || !defined(MAC_OS_X_VERSION_10_7) || \
+    (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7)
+  // Check "memcpy". Use Ident() to avoid inlining.
+  memset(str, 'z', size);
+  Ident(memcpy)(str + 1, str + 11, 10);
+  Ident(memcpy)(str, str, 0);
+  EXPECT_DEATH(Ident(memcpy)(str, str + 14, 15), OverlapErrorMessage("memcpy"));
+  EXPECT_DEATH(Ident(memcpy)(str + 14, str, 15), OverlapErrorMessage("memcpy"));
+#endif
+
+  // We do not treat memcpy with to==from as a bug.
+  // See http://llvm.org/bugs/show_bug.cgi?id=11763.
+  // EXPECT_DEATH(Ident(memcpy)(str + 20, str + 20, 1),
+  //              OverlapErrorMessage("memcpy"));
+
+  // Check "strcpy".
+  memset(str, 'z', size);
+  str[9] = '\0';
+  strcpy(str + 10, str);
+  EXPECT_DEATH(strcpy(str + 9, str), OverlapErrorMessage("strcpy"));
+  EXPECT_DEATH(strcpy(str, str + 4), OverlapErrorMessage("strcpy"));
+  strcpy(str, str + 5);
+
+  // Check "strncpy".
+  memset(str, 'z', size);
+  strncpy(str, str + 10, 10);
+  EXPECT_DEATH(strncpy(str, str + 9, 10), OverlapErrorMessage("strncpy"));
+  EXPECT_DEATH(strncpy(str + 9, str, 10), OverlapErrorMessage("strncpy"));
+  str[10] = '\0';
+  strncpy(str + 11, str, 20);
+  EXPECT_DEATH(strncpy(str + 10, str, 20), OverlapErrorMessage("strncpy"));
+
+  // Check "strcat".
+  memset(str, 'z', size);
+  str[10] = '\0';
+  str[20] = '\0';
+  strcat(str, str + 10);
+  strcat(str, str + 11);
+  str[10] = '\0';
+  strcat(str + 11, str);
+  EXPECT_DEATH(strcat(str, str + 9), OverlapErrorMessage("strcat"));
+  EXPECT_DEATH(strcat(str + 9, str), OverlapErrorMessage("strcat"));
+  EXPECT_DEATH(strcat(str + 10, str), OverlapErrorMessage("strcat"));
+
+  free(str);
+}
+
+void CallAtoi(const char *nptr) {
+  Ident(atoi(nptr));
+}
+void CallAtol(const char *nptr) {
+  Ident(atol(nptr));
+}
+void CallAtoll(const char *nptr) {
+  Ident(atoll(nptr));
+}
+typedef void(*PointerToCallAtoi)(const char*);
+
+void RunAtoiOOBTest(PointerToCallAtoi Atoi) {
+  char *array = MallocAndMemsetString(10, '1');
+  // Invalid pointer to the string.
+  EXPECT_DEATH(Atoi(array + 11), RightOOBErrorMessage(1));
+  EXPECT_DEATH(Atoi(array - 1), LeftOOBErrorMessage(1));
+  // Die if a buffer doesn't have terminating NULL.
+  EXPECT_DEATH(Atoi(array), RightOOBErrorMessage(0));
+  // Make last symbol a terminating NULL or other non-digit.
+  array[9] = '\0';
+  Atoi(array);
+  array[9] = 'a';
+  Atoi(array);
+  Atoi(array + 9);
+  // Sometimes we need to detect overflow if no digits are found.
+  memset(array, ' ', 10);
+  EXPECT_DEATH(Atoi(array), RightOOBErrorMessage(0));
+  array[9] = '-';
+  EXPECT_DEATH(Atoi(array), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Atoi(array + 9), RightOOBErrorMessage(0));
+  array[8] = '-';
+  Atoi(array);
+  delete array;
+}
+
+TEST(AddressSanitizer, AtoiAndFriendsOOBTest) {
+  RunAtoiOOBTest(&CallAtoi);
+  RunAtoiOOBTest(&CallAtol);
+  RunAtoiOOBTest(&CallAtoll);
+}
+
+void CallStrtol(const char *nptr, char **endptr, int base) {
+  Ident(strtol(nptr, endptr, base));
+}
+void CallStrtoll(const char *nptr, char **endptr, int base) {
+  Ident(strtoll(nptr, endptr, base));
+}
+typedef void(*PointerToCallStrtol)(const char*, char**, int);
+
+void RunStrtolOOBTest(PointerToCallStrtol Strtol) {
+  char *array = MallocAndMemsetString(3);
+  char *endptr = NULL;
+  array[0] = '1';
+  array[1] = '2';
+  array[2] = '3';
+  // Invalid pointer to the string.
+  EXPECT_DEATH(Strtol(array + 3, NULL, 0), RightOOBErrorMessage(0));
+  EXPECT_DEATH(Strtol(array - 1, NULL, 0), LeftOOBErrorMessage(1));
+  // Buffer overflow if there is no terminating null (depends on base).
+  Strtol(array, &endptr, 3);
+  EXPECT_EQ(array + 2, endptr);
+  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBErrorMessage(0));
+  array[2] = 'z';
+  Strtol(array, &endptr, 35);
+  EXPECT_EQ(array + 2, endptr);
+  EXPECT_DEATH(Strtol(array, NULL, 36), RightOOBErrorMessage(0));
+  // Add terminating zero to get rid of overflow.
+  array[2] = '\0';
+  Strtol(array, NULL, 36);
+  // Don't check for overflow if base is invalid.
+  Strtol(array - 1, NULL, -1);
+  Strtol(array + 3, NULL, 1);
+  // Sometimes we need to detect overflow if no digits are found.
+  array[0] = array[1] = array[2] = ' ';
+  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBErrorMessage(0));
+  array[2] = '+';
+  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBErrorMessage(0));
+  array[2] = '-';
+  EXPECT_DEATH(Strtol(array, NULL, 0), RightOOBErrorMessage(0));
+  array[1] = '+';
+  Strtol(array, NULL, 0);
+  array[1] = array[2] = 'z';
+  Strtol(array, &endptr, 0);
+  EXPECT_EQ(array, endptr);
+  Strtol(array + 2, NULL, 0);
+  EXPECT_EQ(array, endptr);
+  delete array;
+}
+
+TEST(AddressSanitizer, StrtollOOBTest) {
+  RunStrtolOOBTest(&CallStrtoll);
+}
+TEST(AddressSanitizer, StrtolOOBTest) {
+  RunStrtolOOBTest(&CallStrtol);
+}
+
+// At the moment we instrument memcpy/memove/memset calls at compile time so we
+// can't handle OOB error if these functions are called by pointer, see disabled
+// MemIntrinsicCallByPointerTest below
+typedef void*(*PointerToMemTransfer)(void*, const void*, size_t);
+typedef void*(*PointerToMemSet)(void*, int, size_t);
+
+void CallMemSetByPointer(PointerToMemSet MemSet) {
+  size_t size = Ident(100);
+  char *array = Ident((char*)malloc(size));
+  EXPECT_DEATH(MemSet(array, 0, 101), RightOOBErrorMessage(0));
+  free(array);
+}
+
+void CallMemTransferByPointer(PointerToMemTransfer MemTransfer) {
+  size_t size = Ident(100);
+  char *src = Ident((char*)malloc(size));
+  char *dst = Ident((char*)malloc(size));
+  EXPECT_DEATH(MemTransfer(dst, src, 101), RightOOBErrorMessage(0));
+  free(src);
+  free(dst);
+}
+
+TEST(AddressSanitizer, DISABLED_MemIntrinsicCallByPointerTest) {
+  CallMemSetByPointer(&memset);
+  CallMemTransferByPointer(&memcpy);
+  CallMemTransferByPointer(&memmove);
+}
+
+// This test case fails
+// Clang optimizes memcpy/memset calls which lead to unaligned access
+TEST(AddressSanitizer, DISABLED_MemIntrinsicUnalignedAccessTest) {
+  int size = Ident(4096);
+  char *s = Ident((char*)malloc(size));
+  EXPECT_DEATH(memset(s + size - 1, 0, 2), RightOOBErrorMessage(0));
+  free(s);
+}
+
+// TODO(samsonov): Add a test with malloc(0)
+// TODO(samsonov): Add tests for str* and mem* functions.
+
+NOINLINE static int LargeFunction(bool do_bad_access) {
+  int *x = new int[100];
+  x[0]++;
+  x[1]++;
+  x[2]++;
+  x[3]++;
+  x[4]++;
+  x[5]++;
+  x[6]++;
+  x[7]++;
+  x[8]++;
+  x[9]++;
+
+  x[do_bad_access ? 100 : 0]++; int res = __LINE__;
+
+  x[10]++;
+  x[11]++;
+  x[12]++;
+  x[13]++;
+  x[14]++;
+  x[15]++;
+  x[16]++;
+  x[17]++;
+  x[18]++;
+  x[19]++;
+
+  delete x;
+  return res;
+}
+
+// Test the we have correct debug info for the failing instruction.
+// This test requires the in-process symbolizer to be enabled by default.
+TEST(AddressSanitizer, DISABLED_LargeFunctionSymbolizeTest) {
+  int failing_line = LargeFunction(false);
+  char expected_warning[128];
+  sprintf(expected_warning, "LargeFunction.*asan_test.cc:%d", failing_line);
+  EXPECT_DEATH(LargeFunction(true), expected_warning);
+}
+
+// Check that we unwind and symbolize correctly.
+TEST(AddressSanitizer, DISABLED_MallocFreeUnwindAndSymbolizeTest) {
+  int *a = (int*)malloc_aaa(sizeof(int));
+  *a = 1;
+  free_aaa(a);
+  EXPECT_DEATH(*a = 1, "free_ccc.*free_bbb.*free_aaa.*"
+               "malloc_fff.*malloc_eee.*malloc_ddd");
+}
+
+void *ThreadedTestAlloc(void *a) {
+  int **p = (int**)a;
+  *p = new int;
+  return 0;
+}
+
+void *ThreadedTestFree(void *a) {
+  int **p = (int**)a;
+  delete *p;
+  return 0;
+}
+
+void *ThreadedTestUse(void *a) {
+  int **p = (int**)a;
+  **p = 1;
+  return 0;
+}
+
+void ThreadedTestSpawn() {
+  pthread_t t;
+  int *x;
+  pthread_create(&t, 0, ThreadedTestAlloc, &x);
+  pthread_join(t, 0);
+  pthread_create(&t, 0, ThreadedTestFree, &x);
+  pthread_join(t, 0);
+  pthread_create(&t, 0, ThreadedTestUse, &x);
+  pthread_join(t, 0);
+}
+
+TEST(AddressSanitizer, ThreadedTest) {
+  EXPECT_DEATH(ThreadedTestSpawn(),
+               ASAN_PCRE_DOTALL
+               "Thread T.*created"
+               ".*Thread T.*created"
+               ".*Thread T.*created");
+}
+
+#if ASAN_NEEDS_SEGV
+TEST(AddressSanitizer, ShadowGapTest) {
+#if __WORDSIZE == 32
+  char *addr = (char*)0x22000000;
+#else
+  char *addr = (char*)0x0000100000080000;
+#endif
+  EXPECT_DEATH(*addr = 1, "AddressSanitizer crashed on unknown");
+}
+#endif  // ASAN_NEEDS_SEGV
+
+extern "C" {
+NOINLINE static void UseThenFreeThenUse() {
+  char *x = Ident((char*)malloc(8));
+  *x = 1;
+  free_aaa(x);
+  *x = 2;
+}
+}
+
+TEST(AddressSanitizer, UseThenFreeThenUseTest) {
+  EXPECT_DEATH(UseThenFreeThenUse(), "freed by thread");
+}
+
+TEST(AddressSanitizer, StrDupTest) {
+  free(strdup(Ident("123")));
+}
+
+// Currently we create and poison redzone at right of global variables.
+char glob5[5];
+static char static110[110];
+const char ConstGlob[7] = {1, 2, 3, 4, 5, 6, 7};
+static const char StaticConstGlob[3] = {9, 8, 7};
+extern int GlobalsTest(int x);
+
+TEST(AddressSanitizer, GlobalTest) {
+  static char func_static15[15];
+
+  static char fs1[10];
+  static char fs2[10];
+  static char fs3[10];
+
+  glob5[Ident(0)] = 0;
+  glob5[Ident(1)] = 0;
+  glob5[Ident(2)] = 0;
+  glob5[Ident(3)] = 0;
+  glob5[Ident(4)] = 0;
+
+  EXPECT_DEATH(glob5[Ident(5)] = 0,
+               "0 bytes to the right of global variable.*glob5.* size 5");
+  EXPECT_DEATH(glob5[Ident(5+6)] = 0,
+               "6 bytes to the right of global variable.*glob5.* size 5");
+  Ident(static110);  // avoid optimizations
+  static110[Ident(0)] = 0;
+  static110[Ident(109)] = 0;
+  EXPECT_DEATH(static110[Ident(110)] = 0,
+               "0 bytes to the right of global variable");
+  EXPECT_DEATH(static110[Ident(110+7)] = 0,
+               "7 bytes to the right of global variable");
+
+  Ident(func_static15);  // avoid optimizations
+  func_static15[Ident(0)] = 0;
+  EXPECT_DEATH(func_static15[Ident(15)] = 0,
+               "0 bytes to the right of global variable");
+  EXPECT_DEATH(func_static15[Ident(15 + 9)] = 0,
+               "9 bytes to the right of global variable");
+
+  Ident(fs1);
+  Ident(fs2);
+  Ident(fs3);
+
+  // We don't create left redzones, so this is not 100% guaranteed to fail.
+  // But most likely will.
+  EXPECT_DEATH(fs2[Ident(-1)] = 0, "is located.*of global variable");
+
+  EXPECT_DEATH(Ident(Ident(ConstGlob)[8]),
+               "is located 1 bytes to the right of .*ConstGlob");
+  EXPECT_DEATH(Ident(Ident(StaticConstGlob)[5]),
+               "is located 2 bytes to the right of .*StaticConstGlob");
+
+  // call stuff from another file.
+  GlobalsTest(0);
+}
+
+TEST(AddressSanitizer, GlobalStringConstTest) {
+  static const char *zoo = "FOOBAR123";
+  const char *p = Ident(zoo);
+  EXPECT_DEATH(Ident(p[15]), "is ascii string 'FOOBAR123'");
+}
+
+TEST(AddressSanitizer, FileNameInGlobalReportTest) {
+  static char zoo[10];
+  const char *p = Ident(zoo);
+  // The file name should be present in the report.
+  EXPECT_DEATH(Ident(p[15]), "zoo.*asan_test.cc");
+}
+
+int *ReturnsPointerToALocalObject() {
+  int a = 0;
+  return Ident(&a);
+}
+
+#if ASAN_UAR == 1
+TEST(AddressSanitizer, LocalReferenceReturnTest) {
+  int *(*f)() = Ident(ReturnsPointerToALocalObject);
+  int *p = f();
+  // Call 'f' a few more times, 'p' should still be poisoned.
+  for (int i = 0; i < 32; i++)
+    f();
+  EXPECT_DEATH(*p = 1, "AddressSanitizer stack-use-after-return");
+  EXPECT_DEATH(*p = 1, "is located.*in frame .*ReturnsPointerToALocal");
+}
+#endif
+
+template <int kSize>
+NOINLINE static void FuncWithStack() {
+  char x[kSize];
+  Ident(x)[0] = 0;
+  Ident(x)[kSize-1] = 0;
+}
+
+static void LotsOfStackReuse() {
+  int LargeStack[10000];
+  Ident(LargeStack)[0] = 0;
+  for (int i = 0; i < 10000; i++) {
+    FuncWithStack<128 * 1>();
+    FuncWithStack<128 * 2>();
+    FuncWithStack<128 * 4>();
+    FuncWithStack<128 * 8>();
+    FuncWithStack<128 * 16>();
+    FuncWithStack<128 * 32>();
+    FuncWithStack<128 * 64>();
+    FuncWithStack<128 * 128>();
+    FuncWithStack<128 * 256>();
+    FuncWithStack<128 * 512>();
+    Ident(LargeStack)[0] = 0;
+  }
+}
+
+TEST(AddressSanitizer, StressStackReuseTest) {
+  LotsOfStackReuse();
+}
+
+TEST(AddressSanitizer, ThreadedStressStackReuseTest) {
+  const int kNumThreads = 20;
+  pthread_t t[kNumThreads];
+  for (int i = 0; i < kNumThreads; i++) {
+    pthread_create(&t[i], 0, (void* (*)(void *x))LotsOfStackReuse, 0);
+  }
+  for (int i = 0; i < kNumThreads; i++) {
+    pthread_join(t[i], 0);
+  }
+}
+
+static void *PthreadExit(void *a) {
+  pthread_exit(0);
+  return 0;
+}
+
+TEST(AddressSanitizer, PthreadExitTest) {
+  pthread_t t;
+  for (int i = 0; i < 1000; i++) {
+    pthread_create(&t, 0, PthreadExit, 0);
+    pthread_join(t, 0);
+  }
+}
+
+#ifdef __EXCEPTIONS
+NOINLINE static void StackReuseAndException() {
+  int large_stack[1000];
+  Ident(large_stack);
+  ASAN_THROW(1);
+}
+
+// TODO(kcc): support exceptions with use-after-return.
+TEST(AddressSanitizer, DISABLED_StressStackReuseAndExceptionsTest) {
+  for (int i = 0; i < 10000; i++) {
+    try {
+    StackReuseAndException();
+    } catch(...) {
+    }
+  }
+}
+#endif
+
+TEST(AddressSanitizer, MlockTest) {
+  EXPECT_EQ(0, mlockall(MCL_CURRENT));
+  EXPECT_EQ(0, mlock((void*)0x12345, 0x5678));
+  EXPECT_EQ(0, munlockall());
+  EXPECT_EQ(0, munlock((void*)0x987, 0x654));
+}
+
+struct LargeStruct {
+  int foo[100];
+};
+
+// Test for bug http://llvm.org/bugs/show_bug.cgi?id=11763.
+// Struct copy should not cause asan warning even if lhs == rhs.
+TEST(AddressSanitizer, LargeStructCopyTest) {
+  LargeStruct a;
+  *Ident(&a) = *Ident(&a);
+}
+
+__attribute__((no_address_safety_analysis))
+static void NoAddressSafety() {
+  char *foo = new char[10];
+  Ident(foo)[10] = 0;
+  delete [] foo;
+}
+
+TEST(AddressSanitizer, AttributeNoAddressSafetyTest) {
+  Ident(NoAddressSafety)();
+}
+
+// ------------------ demo tests; run each one-by-one -------------
+// e.g. --gtest_filter=*DemoOOBLeftHigh --gtest_also_run_disabled_tests
+TEST(AddressSanitizer, DISABLED_DemoThreadedTest) {
+  ThreadedTestSpawn();
+}
+
+void *SimpleBugOnSTack(void *x = 0) {
+  char a[20];
+  Ident(a)[20] = 0;
+  return 0;
+}
+
+TEST(AddressSanitizer, DISABLED_DemoStackTest) {
+  SimpleBugOnSTack();
+}
+
+TEST(AddressSanitizer, DISABLED_DemoThreadStackTest) {
+  pthread_t t;
+  pthread_create(&t, 0, SimpleBugOnSTack, 0);
+  pthread_join(t, 0);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoUAFLowIn) {
+  uaf_test<U1>(10, 0);
+}
+TEST(AddressSanitizer, DISABLED_DemoUAFLowLeft) {
+  uaf_test<U1>(10, -2);
+}
+TEST(AddressSanitizer, DISABLED_DemoUAFLowRight) {
+  uaf_test<U1>(10, 10);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoUAFHigh) {
+  uaf_test<U1>(kLargeMalloc, 0);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoOOBLeftLow) {
+  oob_test<U1>(10, -1);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoOOBLeftHigh) {
+  oob_test<U1>(kLargeMalloc, -1);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoOOBRightLow) {
+  oob_test<U1>(10, 10);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoOOBRightHigh) {
+  oob_test<U1>(kLargeMalloc, kLargeMalloc);
+}
+
+TEST(AddressSanitizer, DISABLED_DemoOOM) {
+  size_t size = __WORDSIZE == 64 ? (size_t)(1ULL << 40) : (0xf0000000);
+  printf("%p\n", malloc(size));
+}
+
+TEST(AddressSanitizer, DISABLED_DemoDoubleFreeTest) {
+  DoubleFree();
+}
+
+TEST(AddressSanitizer, DISABLED_DemoNullDerefTest) {
+  int *a = 0;
+  Ident(a)[10] = 0;
+}
+
+TEST(AddressSanitizer, DISABLED_DemoFunctionStaticTest) {
+  static char a[100];
+  static char b[100];
+  static char c[100];
+  Ident(a);
+  Ident(b);
+  Ident(c);
+  Ident(a)[5] = 0;
+  Ident(b)[105] = 0;
+  Ident(a)[5] = 0;
+}
+
+TEST(AddressSanitizer, DISABLED_DemoTooMuchMemoryTest) {
+  const size_t kAllocSize = (1 << 28) - 1024;
+  size_t total_size = 0;
+  while (true) {
+    char *x = (char*)malloc(kAllocSize);
+    memset(x, 0, kAllocSize);
+    total_size += kAllocSize;
+    fprintf(stderr, "total: %ldM\n", (long)total_size >> 20);
+  }
+}
+
+// http://code.google.com/p/address-sanitizer/issues/detail?id=66
+TEST(AddressSanitizer, BufferOverflowAfterManyFrees) {
+  for (int i = 0; i < 1000000; i++) {
+    delete [] (Ident(new char [8644]));
+  }
+  char *x = new char[8192];
+  EXPECT_DEATH(x[Ident(8192)] = 0, "AddressSanitizer heap-buffer-overflow");
+  delete [] Ident(x);
+}
+
+#ifdef __APPLE__
+#include "asan_mac_test.h"
+// TODO(glider): figure out whether we still need these tests. Is it correct
+// to intercept CFAllocator?
+TEST(AddressSanitizerMac, DISABLED_CFAllocatorDefaultDoubleFree) {
+  EXPECT_DEATH(
+      CFAllocatorDefaultDoubleFree(),
+      "attempting double-free");
+}
+
+TEST(AddressSanitizerMac, DISABLED_CFAllocatorSystemDefaultDoubleFree) {
+  EXPECT_DEATH(
+      CFAllocatorSystemDefaultDoubleFree(),
+      "attempting double-free");
+}
+
+TEST(AddressSanitizerMac, DISABLED_CFAllocatorMallocDoubleFree) {
+  EXPECT_DEATH(CFAllocatorMallocDoubleFree(), "attempting double-free");
+}
+
+TEST(AddressSanitizerMac, DISABLED_CFAllocatorMallocZoneDoubleFree) {
+  EXPECT_DEATH(CFAllocatorMallocZoneDoubleFree(), "attempting double-free");
+}
+
+TEST(AddressSanitizerMac, GCDDispatchAsync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDDispatchAsync(), "Shadow byte and word");
+}
+
+TEST(AddressSanitizerMac, GCDDispatchSync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDDispatchSync(), "Shadow byte and word");
+}
+
+
+TEST(AddressSanitizerMac, GCDReuseWqthreadsAsync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDReuseWqthreadsAsync(), "Shadow byte and word");
+}
+
+TEST(AddressSanitizerMac, GCDReuseWqthreadsSync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDReuseWqthreadsSync(), "Shadow byte and word");
+}
+
+TEST(AddressSanitizerMac, GCDDispatchAfter) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDDispatchAfter(), "Shadow byte and word");
+}
+
+TEST(AddressSanitizerMac, GCDSourceEvent) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDSourceEvent(), "Shadow byte and word");
+}
+
+TEST(AddressSanitizerMac, GCDSourceCancel) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDSourceCancel(), "Shadow byte and word");
+}
+
+TEST(AddressSanitizerMac, GCDGroupAsync) {
+  // Make sure the whole ASan report is printed, i.e. that we don't die
+  // on a CHECK.
+  EXPECT_DEATH(TestGCDGroupAsync(), "Shadow byte and word");
+}
+
+void *MallocIntrospectionLockWorker(void *_) {
+  const int kNumPointers = 100;
+  int i;
+  void *pointers[kNumPointers];
+  for (i = 0; i < kNumPointers; i++) {
+    pointers[i] = malloc(i + 1);
+  }
+  for (i = 0; i < kNumPointers; i++) {
+    free(pointers[i]);
+  }
+
+  return NULL;
+}
+
+void *MallocIntrospectionLockForker(void *_) {
+  pid_t result = fork();
+  if (result == -1) {
+    perror("fork");
+  }
+  assert(result != -1);
+  if (result == 0) {
+    // Call malloc in the child process to make sure we won't deadlock.
+    void *ptr = malloc(42);
+    free(ptr);
+    exit(0);
+  } else {
+    // Return in the parent process.
+    return NULL;
+  }
+}
+
+TEST(AddressSanitizerMac, MallocIntrospectionLock) {
+  // Incorrect implementation of force_lock and force_unlock in our malloc zone
+  // will cause forked processes to deadlock.
+  // TODO(glider): need to detect that none of the child processes deadlocked.
+  const int kNumWorkers = 5, kNumIterations = 100;
+  int i, iter;
+  for (iter = 0; iter < kNumIterations; iter++) {
+    pthread_t workers[kNumWorkers], forker;
+    for (i = 0; i < kNumWorkers; i++) {
+      pthread_create(&workers[i], 0, MallocIntrospectionLockWorker, 0);
+    }
+    pthread_create(&forker, 0, MallocIntrospectionLockForker, 0);
+    for (i = 0; i < kNumWorkers; i++) {
+      pthread_join(workers[i], 0);
+    }
+    pthread_join(forker, 0);
+  }
+}
+
+void *TSDAllocWorker(void *test_key) {
+  if (test_key) {
+    void *mem = malloc(10);
+    pthread_setspecific(*(pthread_key_t*)test_key, mem);
+  }
+  return NULL;
+}
+
+TEST(AddressSanitizerMac, DISABLED_TSDWorkqueueTest) {
+  pthread_t th;
+  pthread_key_t test_key;
+  pthread_key_create(&test_key, CallFreeOnWorkqueue);
+  pthread_create(&th, NULL, TSDAllocWorker, &test_key);
+  pthread_join(th, NULL);
+  pthread_key_delete(test_key);
+}
+
+// Test that CFStringCreateCopy does not copy constant strings.
+TEST(AddressSanitizerMac, CFStringCreateCopy) {
+  CFStringRef str = CFSTR("Hello world!\n");
+  CFStringRef str2 = CFStringCreateCopy(0, str);
+  EXPECT_EQ(str, str2);
+}
+
+TEST(AddressSanitizerMac, NSObjectOOB) {
+  // Make sure that our allocators are used for NSObjects.
+  EXPECT_DEATH(TestOOBNSObjects(), "heap-buffer-overflow");
+}
+#endif  // __APPLE__
+
+// Test that instrumentation of stack allocations takes into account
+// AllocSize of a type, and not its StoreSize (16 vs 10 bytes for long double).
+// See http://llvm.org/bugs/show_bug.cgi?id=12047 for more details.
+TEST(AddressSanitizer, LongDoubleNegativeTest) {
+  long double a, b;
+  static long double c;
+  memcpy(Ident(&a), Ident(&b), sizeof(long double));
+  memcpy(Ident(&c), Ident(&b), sizeof(long double));
+};
+
+int main(int argc, char **argv) {
+  progname = argv[0];
+  testing::GTEST_FLAG(death_test_style) = "threadsafe";
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/lib/asan/tests/asan_test.ignore b/lib/asan/tests/asan_test.ignore
new file mode 100644
index 0000000..7bafa83
--- /dev/null
+++ b/lib/asan/tests/asan_test.ignore
@@ -0,0 +1,2 @@
+fun:*IgnoreTest*
+fun:*SomeOtherFunc*
diff --git a/lib/asan/tests/asan_test_config.h b/lib/asan/tests/asan_test_config.h
new file mode 100644
index 0000000..6cf0e69
--- /dev/null
+++ b/lib/asan/tests/asan_test_config.h
@@ -0,0 +1,48 @@
+//===-- asan_test_config.h ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_TEST_CONFIG_H
+#define ASAN_TEST_CONFIG_H
+
+#include <vector>
+#include <string>
+#include <map>
+
+#include "gtest/gtest.h"
+
+using std::string;
+using std::vector;
+using std::map;
+
+#ifndef ASAN_UAR
+# error "please define ASAN_UAR"
+#endif
+
+#ifndef ASAN_HAS_EXCEPTIONS
+# error "please define ASAN_HAS_EXCEPTIONS"
+#endif
+
+#ifndef ASAN_HAS_BLACKLIST
+# error "please define ASAN_HAS_BLACKLIST"
+#endif
+
+#ifndef ASAN_NEEDS_SEGV
+# error "please define ASAN_NEEDS_SEGV"
+#endif
+
+#ifndef ASAN_LOW_MEMORY
+#define ASAN_LOW_MEMORY 0
+#endif
+
+#define ASAN_PCRE_DOTALL ""
+
+#endif  // ASAN_TEST_CONFIG_H
diff --git a/lib/asan/tests/asan_test_utils.h b/lib/asan/tests/asan_test_utils.h
new file mode 100644
index 0000000..fb509cc
--- /dev/null
+++ b/lib/asan/tests/asan_test_utils.h
@@ -0,0 +1,56 @@
+//===-- asan_test_utils.h ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASAN_TEST_UTILS_H
+#define ASAN_TEST_UTILS_H
+
+#if defined(_WIN32)
+typedef unsigned __int8  uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int8           int8_t;
+typedef __int16          int16_t;
+typedef __int32          int32_t;
+typedef __int64          int64_t;
+# define NOINLINE __declspec(noinline)
+#else  // defined(_WIN32)
+# define NOINLINE __attribute__((noinline))
+#endif  // defined(_WIN32)
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+
+#ifndef __WORDSIZE
+#if __LP64__ || defined(_WIN64)
+#define __WORDSIZE 64
+#else
+#define __WORDSIZE 32
+#endif
+#endif
+
+// Make the compiler think that something is going on there.
+extern "C" void break_optimization(void *);
+
+// This function returns its parameter but in such a way that compiler
+// can not prove it.
+template<class T>
+NOINLINE
+static T Ident(T t) {
+  T ret = t;
+  break_optimization(&ret);
+  return ret;
+}
+
+#endif  // ASAN_TEST_UTILS_H
diff --git a/lib/ashldi3.c b/lib/ashldi3.c
index adce4e2..6c558fe 100644
--- a/lib/ashldi3.c
+++ b/lib/ashldi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/ashrdi3.c b/lib/ashrdi3.c
index 03692a3..38ab716 100644
--- a/lib/ashrdi3.c
+++ b/lib/ashrdi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/assembly.h b/lib/assembly.h
index 0ce83ac..83bed12 100644
--- a/lib/assembly.h
+++ b/lib/assembly.h
@@ -35,15 +35,16 @@
 #define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
 
 #ifdef VISIBILITY_HIDDEN
-#define DEFINE_COMPILERRT_FUNCTION(name)                   \
-  .globl SYMBOL_NAME(name) SEPARATOR                       \
-  HIDDEN_DIRECTIVE SYMBOL_NAME(name) SEPARATOR             \
-  SYMBOL_NAME(name):
+#define DECLARE_SYMBOL_VISIBILITY(name)                    \
+  HIDDEN_DIRECTIVE SYMBOL_NAME(name) SEPARATOR
 #else
+#define DECLARE_SYMBOL_VISIBILITY(name)
+#endif
+
 #define DEFINE_COMPILERRT_FUNCTION(name)                   \
   .globl SYMBOL_NAME(name) SEPARATOR                       \
+  DECLARE_SYMBOL_VISIBILITY(name)                          \
   SYMBOL_NAME(name):
-#endif
 
 #define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)           \
   .globl SYMBOL_NAME(name) SEPARATOR                       \
diff --git a/lib/atomic.c b/lib/atomic.c
new file mode 100644
index 0000000..13dfff4
--- /dev/null
+++ b/lib/atomic.c
@@ -0,0 +1,315 @@
+/*===-- atomic.c - Implement support functions for atomic operations.------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ *  atomic.c defines a set of functions for performing atomic accesses on
+ *  arbitrary-sized memory locations.  This design uses locks that should
+ *  be fast in the uncontended case, for two reasons:
+ * 
+ *  1) This code must work with C programs that do not link to anything
+ *     (including pthreads) and so it should not depend on any pthread
+ *     functions.
+ *  2) Atomic operations, rather than explicit mutexes, are most commonly used
+ *     on code where contended operations are rate.
+ * 
+ *  To avoid needing a per-object lock, this code allocates an array of
+ *  locks and hashes the object pointers to find the one that it should use.
+ *  For operations that must be atomic on two locations, the lower lock is
+ *  always acquired first, to avoid deadlock.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+// Clang objects if you redefine a builtin.  This little hack allows us to
+// define a function with the same name as an intrinsic.
+#pragma redefine_extname __atomic_load_n __atomic_load
+#pragma redefine_extname __atomic_store_n __atomic_store
+#pragma redefine_extname __atomic_exchange_n __atomic_exchange
+#pragma redefine_extname __atomic_compare_exchange_n __atomic_compare_exchange
+
+/// Number of locks.  This allocates one page on 32-bit platforms, two on
+/// 64-bit.  This can be specified externally if a different trade between
+/// memory usage and contention probability is required for a given platform.
+#ifndef SPINLOCK_COUNT
+#define SPINLOCK_COUNT (1<<10)
+#endif
+static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
+
+////////////////////////////////////////////////////////////////////////////////
+// Platform-specific lock implementation.  Falls back to spinlocks if none is
+// defined.  Each platform should define the Lock type, and corresponding
+// lock() and unlock() functions.
+////////////////////////////////////////////////////////////////////////////////
+#ifdef __FreeBSD__
+#include <errno.h>
+#include <sys/types.h>
+#include <machine/atomic.h>
+#include <sys/umtx.h>
+typedef struct _usem Lock;
+inline static void unlock(Lock *l) {
+  __atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE);
+  __atomic_thread_fence(__ATOMIC_SEQ_CST);
+  if (l->_has_waiters)
+      _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0);
+}
+inline static void lock(Lock *l) {
+  uint32_t old = 1;
+  while (!__atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old,
+        0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
+    _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0);
+    old = 1;
+  }
+}
+/// locks for atomic operations
+static Lock locks[SPINLOCK_COUNT] = { [0 ...  SPINLOCK_COUNT-1] = {0,1,0} };
+#else
+typedef _Atomic(uintptr_t) Lock;
+/// Unlock a lock.  This is a release operation.
+inline static void unlock(Lock *l) {
+  __atomic_store(l, 0, __ATOMIC_RELEASE);
+}
+/// Locks a lock.  In the current implementation, this is potentially
+/// unbounded in the contended case.
+inline static void lock(Lock *l) {
+  uintptr_t old = 0;
+  while (!__atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE,
+        __ATOMIC_RELAXED))
+    old = 0;
+}
+/// locks for atomic operations
+static Lock locks[SPINLOCK_COUNT];
+#endif
+
+
+/// Returns a lock to use for a given pointer.  
+static inline Lock *lock_for_pointer(void *ptr) {
+  intptr_t hash = (intptr_t)ptr;
+  // Disregard the lowest 4 bits.  We want all values that may be part of the
+  // same memory operation to hash to the same value and therefore use the same
+  // lock.  
+  hash >>= 4;
+  // Use the next bits as the basis for the hash
+  intptr_t low = hash & SPINLOCK_MASK;
+  // Now use the high(er) set of bits to perturb the hash, so that we don't
+  // get collisions from atomic fields in a single object
+  hash >>= 16;
+  hash ^= low;
+  // Return a pointer to the word to use
+  return locks + (hash & SPINLOCK_MASK);
+}
+
+/// Macros for determining whether a size is lock free.  Clang can not yet
+/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are
+/// not lock free.
+#define IS_LOCK_FREE_1 __atomic_is_lock_free(1)
+#define IS_LOCK_FREE_2 __atomic_is_lock_free(2)
+#define IS_LOCK_FREE_4 __atomic_is_lock_free(4)
+#define IS_LOCK_FREE_8 __atomic_is_lock_free(8)
+#define IS_LOCK_FREE_16 0
+
+/// Macro that calls the compiler-generated lock-free versions of functions
+/// when they exist.
+#define LOCK_FREE_CASES() \
+  do {\
+  switch (size) {\
+    case 2:\
+      if (IS_LOCK_FREE_2) {\
+        LOCK_FREE_ACTION(uint16_t);\
+      }\
+    case 4:\
+      if (IS_LOCK_FREE_4) {\
+        LOCK_FREE_ACTION(uint32_t);\
+      }\
+    case 8:\
+      if (IS_LOCK_FREE_8) {\
+        LOCK_FREE_ACTION(uint64_t);\
+      }\
+    case 16:\
+      if (IS_LOCK_FREE_16) {\
+        /* FIXME: __uint128_t isn't available on 32 bit platforms.
+        LOCK_FREE_ACTION(__uint128_t);*/\
+      }\
+  }\
+  } while (0)
+
+
+/// An atomic load operation.  This is atomic with respect to the source
+/// pointer only.
+void __atomic_load_n(int size, void *src, void *dest, int model) {
+#define LOCK_FREE_ACTION(type) \
+    *((type*)dest) = __atomic_load((_Atomic(type)*)src, model);\
+    return;
+  LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+  Lock *l = lock_for_pointer(src);
+  lock(l);
+  memcpy(dest, src, size);
+  unlock(l);
+}
+
+/// An atomic store operation.  This is atomic with respect to the destination
+/// pointer only.
+void __atomic_store_n(int size, void *dest, void *src, int model) {
+#define LOCK_FREE_ACTION(type) \
+    __atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\
+    return;
+  LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+  Lock *l = lock_for_pointer(dest);
+  lock(l);
+  memcpy(dest, src, size);
+  unlock(l);
+}
+
+/// Atomic compare and exchange operation.  If the value at *ptr is identical
+/// to the value at *expected, then this copies value at *desired to *ptr.  If
+/// they  are not, then this stores the current value from *ptr in *expected.
+///
+/// This function returns 1 if the exchange takes place or 0 if it fails. 
+int __atomic_compare_exchange_n(int size, void *ptr, void *expected,
+    void *desired, int success, int failure) {
+#define LOCK_FREE_ACTION(type) \
+  return __atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\
+      *(type*)desired, success, failure)
+  LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+  Lock *l = lock_for_pointer(ptr);
+  lock(l);
+  if (memcmp(ptr, expected, size) == 0) {
+    memcpy(ptr, desired, size);
+    unlock(l);
+    return 1;
+  }
+  memcpy(expected, ptr, size);
+  unlock(l);
+  return 0;
+}
+
+/// Performs an atomic exchange operation between two pointers.  This is atomic
+/// with respect to the target address.
+void __atomic_exchange_n(int size, void *ptr, void *val, void *old, int model) {
+#define LOCK_FREE_ACTION(type) \
+    *(type*)old = __atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\
+        model);\
+    return;
+  LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+  Lock *l = lock_for_pointer(ptr);
+  lock(l);
+  memcpy(old, ptr, size);
+  memcpy(ptr, val, size);
+  unlock(l);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Where the size is known at compile time, the compiler may emit calls to
+// specialised versions of the above functions.
+////////////////////////////////////////////////////////////////////////////////
+#define OPTIMISED_CASES\
+  OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
+  OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
+  OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
+  OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\
+  /* FIXME: __uint128_t isn't available on 32 bit platforms.
+  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+type __atomic_load_##n(type *src, int model) {\
+  if (lockfree)\
+    return __atomic_load((_Atomic(type)*)src, model);\
+  Lock *l = lock_for_pointer(src);\
+  lock(l);\
+  type val = *src;\
+  unlock(l);\
+  return val;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+void  __atomic_store_##n(type *dest, type val, int model) {\
+  if (lockfree) {\
+    __atomic_store((_Atomic(type)*)dest, val, model);\
+    return;\
+  }\
+  Lock *l = lock_for_pointer(dest);\
+  lock(l);\
+  *dest = val;\
+  unlock(l);\
+  return;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+type __atomic_exchange_##n(type *dest, type val, int model) {\
+  if (lockfree)\
+    return __atomic_exchange((_Atomic(type)*)dest, val, model);\
+  Lock *l = lock_for_pointer(dest);\
+  lock(l);\
+  type tmp = *dest;\
+  *dest = val;\
+  unlock(l);\
+  return tmp;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\
+    int success, int failure) {\
+  if (lockfree)\
+    return __atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\
+        success, failure);\
+  Lock *l = lock_for_pointer(ptr);\
+  lock(l);\
+  if (*ptr == *expected) {\
+    *ptr = desired;\
+    unlock(l);\
+    return 1;\
+  }\
+  *expected = *ptr;\
+  unlock(l);\
+  return 0;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+////////////////////////////////////////////////////////////////////////////////
+// Atomic read-modify-write operations for integers of various sizes.
+////////////////////////////////////////////////////////////////////////////////
+#define ATOMIC_RMW(n, lockfree, type, opname, op) \
+type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\
+  if (lockfree) \
+    return __atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\
+  Lock *l = lock_for_pointer(ptr);\
+  lock(l);\
+  type tmp = *ptr;\
+  *ptr = tmp op val;\
+  unlock(l);\
+  return tmp;\
+}
+
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
diff --git a/lib/clear_cache.c b/lib/clear_cache.c
index 099b76e..b934fd4 100644
--- a/lib/clear_cache.c
+++ b/lib/clear_cache.c
@@ -8,7 +8,7 @@
  * ===----------------------------------------------------------------------===
  */
 
-#include <stdlib.h>
+#include "int_lib.h"
 
 #if __APPLE__
   #include <libkern/OSCacheControl.h>
diff --git a/lib/clzdi2.c b/lib/clzdi2.c
index c281945..b9e64da 100644
--- a/lib/clzdi2.c
+++ b/lib/clzdi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/clzsi2.c b/lib/clzsi2.c
index d0a6aea..25b8ed2 100644
--- a/lib/clzsi2.c
+++ b/lib/clzsi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/cmpdi2.c b/lib/cmpdi2.c
index 999c3d2..c2b1f69 100644
--- a/lib/cmpdi2.c
+++ b/lib/cmpdi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/ctzdi2.c b/lib/ctzdi2.c
index b3d37d0..db3c6fd 100644
--- a/lib/ctzdi2.c
+++ b/lib/ctzdi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/ctzsi2.c b/lib/ctzsi2.c
index 2ff0e5d..c69486e 100644
--- a/lib/ctzsi2.c
+++ b/lib/ctzsi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/divdc3.c b/lib/divdc3.c
index 5f63298..cfbc498 100644
--- a/lib/divdc3.c
+++ b/lib/divdc3.c
@@ -13,8 +13,7 @@
  */
 
 #include "int_lib.h"
-#include <math.h>
-#include <complex.h>
+#include "int_math.h"
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
@@ -22,35 +21,37 @@
 __divdc3(double __a, double __b, double __c, double __d)
 {
     int __ilogbw = 0;
-    double __logbw = logb(fmax(fabs(__c), fabs(__d)));
-    if (isfinite(__logbw))
+    double __logbw = crt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d)));
+    if (crt_isfinite(__logbw))
     {
         __ilogbw = (int)__logbw;
-        __c = scalbn(__c, -__ilogbw);
-        __d = scalbn(__d, -__ilogbw);
+        __c = crt_scalbn(__c, -__ilogbw);
+        __d = crt_scalbn(__d, -__ilogbw);
     }
     double __denom = __c * __c + __d * __d;
     double _Complex z;
-    __real__ z = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (isnan(__real__ z) && isnan(__imag__ z))
+    __real__ z = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
+    __imag__ z = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
     {
-        if ((__denom == 0.0) && (!isnan(__a) || !isnan(__b)))
+        if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = copysign(INFINITY, __c) * __a;
-            __imag__ z = copysign(INFINITY, __c) * __b;
+            __real__ z = crt_copysign(CRT_INFINITY, __c) * __a;
+            __imag__ z = crt_copysign(CRT_INFINITY, __c) * __b;
         }
-        else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d))
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
         {
-            __a = copysign(isinf(__a) ? 1.0 : 0.0, __a);
-            __b = copysign(isinf(__b) ? 1.0 : 0.0, __b);
-            __real__ z = INFINITY * (__a * __c + __b * __d);
-            __imag__ z = INFINITY * (__b * __c - __a * __d);
+            __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a);
+            __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b);
+            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
+            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
         }
-        else if (isinf(__logbw) && __logbw > 0.0 && isfinite(__a) && isfinite(__b))
+        else if (crt_isinf(__logbw) && __logbw > 0.0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
         {
-            __c = copysign(isinf(__c) ? 1.0 : 0.0, __c);
-            __d = copysign(isinf(__d) ? 1.0 : 0.0, __d);
+            __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c);
+            __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d);
             __real__ z = 0.0 * (__a * __c + __b * __d);
             __imag__ z = 0.0 * (__b * __c - __a * __d);
         }
diff --git a/lib/divdf3.c b/lib/divdf3.c
index 925abd5..cc034dd 100644
--- a/lib/divdf3.c
+++ b/lib/divdf3.c
@@ -15,7 +15,6 @@
 // underflow with correct rounding.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/divdi3.c b/lib/divdi3.c
index d62df56..2c2bcc2 100644
--- a/lib/divdi3.c
+++ b/lib/divdi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/divmoddi4.c b/lib/divmoddi4.c
index d3ca745..2fe2b48 100644
--- a/lib/divmoddi4.c
+++ b/lib/divmoddi4.c
@@ -11,14 +11,11 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
 extern COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
 
-ARM_EABI_FNALIAS(ldivmod, divmoddi4);
-
 /* Returns: a / b, *rem = a % b  */
 
 COMPILER_RT_ABI di_int
diff --git a/lib/divmodsi4.c b/lib/divmodsi4.c
index 4dc1978..c7f7b1a 100644
--- a/lib/divmodsi4.c
+++ b/lib/divmodsi4.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/divsc3.c b/lib/divsc3.c
index a05f429..caa0c40 100644
--- a/lib/divsc3.c
+++ b/lib/divsc3.c
@@ -13,8 +13,7 @@
  */
 
 #include "int_lib.h"
-#include <math.h>
-#include <complex.h>
+#include "int_math.h"
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
@@ -22,35 +21,37 @@
 __divsc3(float __a, float __b, float __c, float __d)
 {
     int __ilogbw = 0;
-    float __logbw = logbf(fmaxf(fabsf(__c), fabsf(__d)));
-    if (isfinite(__logbw))
+    float __logbw = crt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d)));
+    if (crt_isfinite(__logbw))
     {
         __ilogbw = (int)__logbw;
-        __c = scalbnf(__c, -__ilogbw);
-        __d = scalbnf(__d, -__ilogbw);
+        __c = crt_scalbnf(__c, -__ilogbw);
+        __d = crt_scalbnf(__d, -__ilogbw);
     }
     float __denom = __c * __c + __d * __d;
     float _Complex z;
-    __real__ z = scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (isnan(__real__ z) && isnan(__imag__ z))
+    __real__ z = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
+    __imag__ z = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
     {
-        if ((__denom == 0) && (!isnan(__a) || !isnan(__b)))
+        if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = copysignf(INFINITY, __c) * __a;
-            __imag__ z = copysignf(INFINITY, __c) * __b;
+            __real__ z = crt_copysignf(CRT_INFINITY, __c) * __a;
+            __imag__ z = crt_copysignf(CRT_INFINITY, __c) * __b;
         }
-        else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d))
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
         {
-            __a = copysignf(isinf(__a) ? 1 : 0, __a);
-            __b = copysignf(isinf(__b) ? 1 : 0, __b);
-            __real__ z = INFINITY * (__a * __c + __b * __d);
-            __imag__ z = INFINITY * (__b * __c - __a * __d);
+            __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
+            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
+            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
         }
-        else if (isinf(__logbw) && __logbw > 0 && isfinite(__a) && isfinite(__b))
+        else if (crt_isinf(__logbw) && __logbw > 0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
         {
-            __c = copysignf(isinf(__c) ? 1 : 0, __c);
-            __d = copysignf(isinf(__d) ? 1 : 0, __d);
+            __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
             __real__ z = 0 * (__a * __c + __b * __d);
             __imag__ z = 0 * (__b * __c - __a * __d);
         }
diff --git a/lib/divsf3.c b/lib/divsf3.c
index b733307..a8230e4 100644
--- a/lib/divsf3.c
+++ b/lib/divsf3.c
@@ -15,7 +15,6 @@
 // underflow with correct rounding.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define SINGLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/divsi3.c b/lib/divsi3.c
index 01ef274..0d81cb8 100644
--- a/lib/divsi3.c
+++ b/lib/divsi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/divxc3.c b/lib/divxc3.c
index f054d40..5f240e9 100644
--- a/lib/divxc3.c
+++ b/lib/divxc3.c
@@ -14,8 +14,7 @@
 #if !_ARCH_PPC
 
 #include "int_lib.h"
-#include <math.h>
-#include <complex.h>
+#include "int_math.h"
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
@@ -23,35 +22,37 @@
 __divxc3(long double __a, long double __b, long double __c, long double __d)
 {
     int __ilogbw = 0;
-    long double __logbw = logbl(fmaxl(fabsl(__c), fabsl(__d)));
-    if (isfinite(__logbw))
+    long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
+    if (crt_isfinite(__logbw))
     {
         __ilogbw = (int)__logbw;
-        __c = scalbnl(__c, -__ilogbw);
-        __d = scalbnl(__d, -__ilogbw);
+        __c = crt_scalbnl(__c, -__ilogbw);
+        __d = crt_scalbnl(__d, -__ilogbw);
     }
     long double __denom = __c * __c + __d * __d;
     long double _Complex z;
-    __real__ z = scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (isnan(__real__ z) && isnan(__imag__ z))
+    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
     {
-        if ((__denom == 0) && (!isnan(__a) || !isnan(__b)))
+        if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = copysignl(INFINITY, __c) * __a;
-            __imag__ z = copysignl(INFINITY, __c) * __b;
+            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
+            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
         }
-        else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d))
+        else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+                 crt_isfinite(__c) && crt_isfinite(__d))
         {
-            __a = copysignl(isinf(__a) ? 1 : 0, __a);
-            __b = copysignl(isinf(__b) ? 1 : 0, __b);
-            __real__ z = INFINITY * (__a * __c + __b * __d);
-            __imag__ z = INFINITY * (__b * __c - __a * __d);
+            __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b);
+            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
+            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
         }
-        else if (isinf(__logbw) && __logbw > 0 && isfinite(__a) && isfinite(__b))
+        else if (crt_isinf(__logbw) && __logbw > 0 &&
+                 crt_isfinite(__a) && crt_isfinite(__b))
         {
-            __c = copysignl(isinf(__c) ? 1 : 0, __c);
-            __d = copysignl(isinf(__d) ? 1 : 0, __d);
+            __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d);
             __real__ z = 0 * (__a * __c + __b * __d);
             __imag__ z = 0 * (__b * __c - __a * __d);
         }
diff --git a/lib/enable_execute_stack.c b/lib/enable_execute_stack.c
index 7ab8e8d..278ca24 100644
--- a/lib/enable_execute_stack.c
+++ b/lib/enable_execute_stack.c
@@ -8,7 +8,8 @@
  * ===----------------------------------------------------------------------===
  */
 
-#include <stdint.h>
+#include "int_lib.h"
+
 #include <sys/mman.h>
 
 /* #include "config.h"
diff --git a/lib/eprintf.c b/lib/eprintf.c
index 7c79174..b07d624 100644
--- a/lib/eprintf.c
+++ b/lib/eprintf.c
@@ -12,7 +12,6 @@
 
 #include "int_lib.h"
 #include <stdio.h>
-#include <stdlib.h>
 
 
 /*
diff --git a/lib/extendsfdf2.c b/lib/extendsfdf2.c
index c0b628d..9466de7 100644
--- a/lib/extendsfdf2.c
+++ b/lib/extendsfdf2.c
@@ -38,10 +38,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <stdint.h>
-#include <limits.h>
-
-#include "abi.h"
+#include "int_lib.h"
 
 typedef float src_t;
 typedef uint32_t src_rep_t;
diff --git a/lib/ffsdi2.c b/lib/ffsdi2.c
index 89f1b7b..a5ac990 100644
--- a/lib/ffsdi2.c
+++ b/lib/ffsdi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/fixdfdi.c b/lib/fixdfdi.c
index 85a456d..c6732db 100644
--- a/lib/fixdfdi.c
+++ b/lib/fixdfdi.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/fixdfsi.c b/lib/fixdfsi.c
index fbcf147..3d4379e 100644
--- a/lib/fixdfsi.c
+++ b/lib/fixdfsi.c
@@ -12,7 +12,6 @@
 // conversion is undefined for out of range values in the C standard.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/fixsfdi.c b/lib/fixsfdi.c
index d80e33e..81ceab0 100644
--- a/lib/fixsfdi.c
+++ b/lib/fixsfdi.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/fixsfsi.c b/lib/fixsfsi.c
index 67749a5..f6de609 100644
--- a/lib/fixsfsi.c
+++ b/lib/fixsfsi.c
@@ -12,7 +12,6 @@
 // conversion is undefined for out of range values in the C standard.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define SINGLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/fixunsdfdi.c b/lib/fixunsdfdi.c
index d80b84a..c0ff160 100644
--- a/lib/fixunsdfdi.c
+++ b/lib/fixunsdfdi.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/fixunsdfsi.c b/lib/fixunsdfsi.c
index ecdfb5d..2ce4999 100644
--- a/lib/fixunsdfsi.c
+++ b/lib/fixunsdfsi.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/fixunssfdi.c b/lib/fixunssfdi.c
index 1506420..09078db 100644
--- a/lib/fixunssfdi.c
+++ b/lib/fixunssfdi.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 /* Returns: convert a to a unsigned long long, rounding toward zero.
diff --git a/lib/fixunssfsi.c b/lib/fixunssfsi.c
index dbaa511..d80ed18 100644
--- a/lib/fixunssfsi.c
+++ b/lib/fixunssfsi.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/floatdidf.c b/lib/floatdidf.c
index 5ba9526..2af9e10 100644
--- a/lib/floatdidf.c
+++ b/lib/floatdidf.c
@@ -11,10 +11,8 @@
  *
  *===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <float.h>
 
 /* Returns: convert a to a double, rounding toward even. */
 
@@ -30,7 +28,6 @@
 /* Support for systems that have hardware floating-point; we'll set the inexact flag
  * as a side-effect of this computation.
  */
-#include <stdint.h>
 
 COMPILER_RT_ABI double
 __floatdidf(di_int a)
diff --git a/lib/floatdisf.c b/lib/floatdisf.c
index 4dc13ca..6607307 100644
--- a/lib/floatdisf.c
+++ b/lib/floatdisf.c
@@ -12,9 +12,6 @@
  *===----------------------------------------------------------------------===
  */
 
-#include "abi.h"
-#include <float.h>
-
 /* Returns: convert a to a float, rounding toward even.*/
 
 /* Assumption: float is a IEEE 32 bit floating point type 
diff --git a/lib/floatsidf.c b/lib/floatsidf.c
index 7227335..74cb66b 100644
--- a/lib/floatsidf.c
+++ b/lib/floatsidf.c
@@ -12,7 +12,6 @@
 // mode.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/floatsisf.c b/lib/floatsisf.c
index e5250ff..a981391 100644
--- a/lib/floatsisf.c
+++ b/lib/floatsisf.c
@@ -12,7 +12,6 @@
 // mode.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define SINGLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/floattidf.c b/lib/floattidf.c
index f61844d..3cafea8 100644
--- a/lib/floattidf.c
+++ b/lib/floattidf.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <float.h>
 
 /* Returns: convert a to a double, rounding toward even.*/
 
diff --git a/lib/floattisf.c b/lib/floattisf.c
index 7eb761d..ab33e4a 100644
--- a/lib/floattisf.c
+++ b/lib/floattisf.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <float.h>
 
 /* Returns: convert a to a float, rounding toward even. */
 
diff --git a/lib/floattixf.c b/lib/floattixf.c
index e4bcb5f..852acc7 100644
--- a/lib/floattixf.c
+++ b/lib/floattixf.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <float.h>
 
 /* Returns: convert a to a long double, rounding toward even. */
 
diff --git a/lib/floatundidf.c b/lib/floatundidf.c
index e74e9d8..6791701 100644
--- a/lib/floatundidf.c
+++ b/lib/floatundidf.c
@@ -12,9 +12,6 @@
  * ===----------------------------------------------------------------------===
  */
 
-#include "abi.h"
-#include <float.h>
-
 /* Returns: convert a to a double, rounding toward even. */
 
 /* Assumption: double is a IEEE 64 bit floating point type 
@@ -32,7 +29,6 @@
  * as a side-effect of this computation.
  */
 
-#include <stdint.h>
 
 COMPILER_RT_ABI double
 __floatundidf(du_int a)
diff --git a/lib/floatundisf.c b/lib/floatundisf.c
index eea45a7..1bf5fbb 100644
--- a/lib/floatundisf.c
+++ b/lib/floatundisf.c
@@ -12,9 +12,6 @@
  *===----------------------------------------------------------------------===
  */
 
-#include "abi.h"
-#include <float.h>
-
 /* Returns: convert a to a float, rounding toward even. */
 
 /* Assumption: float is a IEEE 32 bit floating point type 
diff --git a/lib/floatunsidf.c b/lib/floatunsidf.c
index 3756299..0722248 100644
--- a/lib/floatunsidf.c
+++ b/lib/floatunsidf.c
@@ -12,7 +12,6 @@
 // mode.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/floatunsisf.c b/lib/floatunsisf.c
index 14ef103..3dc1cd4 100644
--- a/lib/floatunsisf.c
+++ b/lib/floatunsisf.c
@@ -12,7 +12,6 @@
 // mode.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define SINGLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/floatuntidf.c b/lib/floatuntidf.c
index bab7483..d0889a0 100644
--- a/lib/floatuntidf.c
+++ b/lib/floatuntidf.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <float.h>
 
 /* Returns: convert a to a double, rounding toward even. */
 
diff --git a/lib/floatuntisf.c b/lib/floatuntisf.c
index 0ab6d7e..f552758 100644
--- a/lib/floatuntisf.c
+++ b/lib/floatuntisf.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <float.h>
 
 /* Returns: convert a to a float, rounding toward even. */
 
diff --git a/lib/floatuntixf.c b/lib/floatuntixf.c
index a84709a..00c07d8 100644
--- a/lib/floatuntixf.c
+++ b/lib/floatuntixf.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <float.h>
 
 /* Returns: convert a to a long double, rounding toward even. */
 
diff --git a/lib/fp_lib.h b/lib/fp_lib.h
index 6c9455a..de5f17f 100644
--- a/lib/fp_lib.h
+++ b/lib/fp_lib.h
@@ -24,6 +24,7 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <limits.h>
+#include "int_lib.h"
 
 #if defined SINGLE_PRECISION
 
diff --git a/lib/gcc_personality_v0.c b/lib/gcc_personality_v0.c
index c840eef..8a708ca 100644
--- a/lib/gcc_personality_v0.c
+++ b/lib/gcc_personality_v0.c
@@ -9,10 +9,6 @@
  *
  */
 
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
 #include "int_lib.h"
 
 /*
@@ -202,7 +198,7 @@
         
     /* There is nothing to do if there is no LSDA for this frame. */
     const uint8_t* lsda = _Unwind_GetLanguageSpecificData(context);
-    if ( lsda == NULL )
+    if ( lsda == (uint8_t*) 0 )
         return _URC_CONTINUE_UNWIND;
 
     uintptr_t pc = _Unwind_GetIP(context)-1;
diff --git a/lib/i386/CMakeLists.txt b/lib/i386/CMakeLists.txt
deleted file mode 100644
index 1c2861a..0000000
--- a/lib/i386/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-SET( SRCS
- 
- )
diff --git a/lib/i386/Makefile.mk b/lib/i386/Makefile.mk
index 20f95e6..1f5c680 100644
--- a/lib/i386/Makefile.mk
+++ b/lib/i386/Makefile.mk
@@ -7,6 +7,7 @@
 #
 #===------------------------------------------------------------------------===#
 
+ModuleName := builtins
 SubDirs := 
 OnlyArchs := i386
 
diff --git a/lib/endianness.h b/lib/int_endianness.h
similarity index 86%
rename from lib/endianness.h
rename to lib/int_endianness.h
index 9b7e7c1..70bd177 100644
--- a/lib/endianness.h
+++ b/lib/int_endianness.h
@@ -1,4 +1,4 @@
-/* ===-- endianness.h - configuration header for compiler-rt ---------------===
+/* ===-- int_endianness.h - configuration header for compiler-rt ------------===
  *
  *		       The LLVM Compiler Infrastructure
  *
@@ -13,13 +13,8 @@
  * ===----------------------------------------------------------------------===
  */
 
-#ifndef ENDIANNESS_H
-#define ENDIANNESS_H
-
-/*
- * Known limitations:
- *   Middle endian systems are not handled currently.
- */
+#ifndef INT_ENDIANNESS_H
+#define INT_ENDIANNESS_H
 
 #if defined(__SVR4) && defined(__sun)
 #include <sys/byteorder.h>
@@ -36,7 +31,7 @@
 
 /* .. */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__minix)
 #include <sys/endian.h>
 
 #if _BYTE_ORDER == _BIG_ENDIAN
@@ -85,10 +80,17 @@
 
 #endif /* GNU/Linux */
 
+#if defined(_WIN32)
+
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+
+#endif /* Windows */
+
 /* . */
 
 #if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN)
 #error Unable to determine endian
 #endif /* Check we found an endianness correctly. */
 
-#endif /* ENDIANNESS_H */
+#endif /* INT_ENDIANNESS_H */
diff --git a/lib/int_lib.h b/lib/int_lib.h
index e1fd6b7..a87426c 100644
--- a/lib/int_lib.h
+++ b/lib/int_lib.h
@@ -16,141 +16,31 @@
 #ifndef INT_LIB_H
 #define INT_LIB_H
 
-/* Assumption:  signed integral is 2's complement */
-/* Assumption:  right shift of signed negative is arithmetic shift */
+/* Assumption: Signed integral is 2's complement. */
+/* Assumption: Right shift of signed negative is arithmetic shift. */
+/* Assumption: Endianness is little or big (not mixed). */
 
-#include <limits.h>
-#include <stdint.h>
-#include "endianness.h"
-#include <math.h>
+/* ABI macro definitions */
 
-/* If compiling for kernel use, call panic() instead of abort(). */
-#ifdef KERNEL_USE
-extern void panic (const char *, ...);
-#define compilerrt_abort() \
-  panic("%s:%d: abort in %s", __FILE__, __LINE__, __FUNCTION__)
+#if __ARM_EABI__
+# define ARM_EABI_FNALIAS(aeabi_name, name)         \
+  void __aeabi_##aeabi_name() __attribute__((alias("__" #name)));
+# define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
 #else
-#define compilerrt_abort() abort()
+# define ARM_EABI_FNALIAS(aeabi_name, name)
+# define COMPILER_RT_ABI
 #endif
 
-#if !defined(INFINITY) && defined(HUGE_VAL)
-#define INFINITY HUGE_VAL
-#endif /* INFINITY */
+/* Include the standard compiler builtin headers we use functionality from. */
+#include <limits.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <float.h>
 
-typedef      int si_int;
-typedef unsigned su_int;
+/* Include the commonly used internal type definitions. */
+#include "int_types.h"
 
-typedef          long long di_int;
-typedef unsigned long long du_int;
-
-typedef union
-{
-    di_int all;
-    struct
-    {
-#if _YUGA_LITTLE_ENDIAN
-        su_int low;
-        si_int high;
-#else
-        si_int high;
-        su_int low;
-#endif /* _YUGA_LITTLE_ENDIAN */
-    }s;
-} dwords;
-
-typedef union
-{
-    du_int all;
-    struct
-    {
-#if _YUGA_LITTLE_ENDIAN
-        su_int low;
-        su_int high;
-#else
-        su_int high;
-        su_int low;
-#endif /* _YUGA_LITTLE_ENDIAN */
-    }s;
-} udwords;
-
-#if __x86_64
-
-typedef int      ti_int __attribute__ ((mode (TI)));
-typedef unsigned tu_int __attribute__ ((mode (TI)));
-
-typedef union
-{
-    ti_int all;
-    struct
-    {
-#if _YUGA_LITTLE_ENDIAN
-        du_int low;
-        di_int high;
-#else
-        di_int high;
-        du_int low;
-#endif /* _YUGA_LITTLE_ENDIAN */
-    }s;
-} twords;
-
-typedef union
-{
-    tu_int all;
-    struct
-    {
-#if _YUGA_LITTLE_ENDIAN
-        du_int low;
-        du_int high;
-#else
-        du_int high;
-        du_int low;
-#endif /* _YUGA_LITTLE_ENDIAN */
-    }s;
-} utwords;
-
-static inline ti_int make_ti(di_int h, di_int l) {
-    twords r;
-    r.s.high = h;
-    r.s.low = l;
-    return r.all;
-}
-
-static inline tu_int make_tu(du_int h, du_int l) {
-    utwords r;
-    r.s.high = h;
-    r.s.low = l;
-    return r.all;
-}
-
-#endif /* __x86_64 */
-
-typedef union
-{
-    su_int u;
-    float f;
-} float_bits;
-
-typedef union
-{
-    udwords u;
-    double  f;
-} double_bits;
-
-typedef struct
-{
-#if _YUGA_LITTLE_ENDIAN
-    udwords low;
-    udwords high;
-#else
-    udwords high;
-    udwords low;
-#endif /* _YUGA_LITTLE_ENDIAN */
-} uqwords;
-
-typedef union
-{
-    uqwords     u;
-    long double f;
-} long_double_bits;
+/* Include internal utility function declarations. */
+#include "int_util.h"
 
 #endif /* INT_LIB_H */
diff --git a/lib/int_math.h b/lib/int_math.h
new file mode 100644
index 0000000..d6b4bda
--- /dev/null
+++ b/lib/int_math.h
@@ -0,0 +1,67 @@
+/* ===-- int_math.h - internal math inlines ---------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines substitutes for the libm functions used in some of the
+ * compiler-rt implementations, defined in such a way that there is not a direct
+ * dependency on libm or math.h. Instead, we use the compiler builtin versions
+ * where available. This reduces our dependencies on the system SDK by foisting
+ * the responsibility onto the compiler.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef INT_MATH_H
+#define INT_MATH_H
+
+#ifndef __has_builtin
+#  define  __has_builtin(x) 0
+#endif
+
+#define CRT_INFINITY __builtin_huge_valf()
+
+#define crt_isinf(x) __builtin_isinf((x))
+#define crt_isnan(x) __builtin_isnan((x))
+
+/* Define crt_isfinite in terms of the builtin if available, otherwise provide
+ * an alternate version in terms of our other functions. This supports some
+ * versions of GCC which didn't have __builtin_isfinite.
+ */
+#if __has_builtin(__builtin_isfinite)
+#  define crt_isfinite(x) __builtin_isfinite((x))
+#else
+#  define crt_isfinite(x) \
+  __extension__(({ \
+      __typeof((x)) x_ = (x); \
+      !crt_isinf(x_) && !crt_isnan(x_); \
+    }))
+#endif
+
+#define crt_copysign(x, y) __builtin_copysign((x), (y))
+#define crt_copysignf(x, y) __builtin_copysignf((x), (y))
+#define crt_copysignl(x, y) __builtin_copysignl((x), (y))
+
+#define crt_fabs(x) __builtin_fabs((x))
+#define crt_fabsf(x) __builtin_fabsf((x))
+#define crt_fabsl(x) __builtin_fabsl((x))
+
+#define crt_fmax(x, y) __builtin_fmax((x), (y))
+#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y))
+#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y))
+
+#define crt_logb(x) __builtin_logb((x))
+#define crt_logbf(x) __builtin_logbf((x))
+#define crt_logbl(x) __builtin_logbl((x))
+
+#define crt_scalbn(x, y) __builtin_scalbn((x), (y))
+#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y))
+#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y))
+
+#endif /* INT_MATH_H */
diff --git a/lib/int_types.h b/lib/int_types.h
new file mode 100644
index 0000000..fcce390
--- /dev/null
+++ b/lib/int_types.h
@@ -0,0 +1,140 @@
+/* ===-- int_lib.h - configuration header for compiler-rt  -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines various standard types, most importantly a number of unions
+ * used to access parts of larger types.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+#include "int_endianness.h"
+
+typedef      int si_int;
+typedef unsigned su_int;
+
+typedef          long long di_int;
+typedef unsigned long long du_int;
+
+typedef union
+{
+    di_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        su_int low;
+        si_int high;
+#else
+        si_int high;
+        su_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} dwords;
+
+typedef union
+{
+    du_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        su_int low;
+        su_int high;
+#else
+        su_int high;
+        su_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} udwords;
+
+#if __x86_64
+
+typedef int      ti_int __attribute__ ((mode (TI)));
+typedef unsigned tu_int __attribute__ ((mode (TI)));
+
+typedef union
+{
+    ti_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        du_int low;
+        di_int high;
+#else
+        di_int high;
+        du_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} twords;
+
+typedef union
+{
+    tu_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        du_int low;
+        du_int high;
+#else
+        du_int high;
+        du_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} utwords;
+
+static inline ti_int make_ti(di_int h, di_int l) {
+    twords r;
+    r.s.high = h;
+    r.s.low = l;
+    return r.all;
+}
+
+static inline tu_int make_tu(du_int h, du_int l) {
+    utwords r;
+    r.s.high = h;
+    r.s.low = l;
+    return r.all;
+}
+
+#endif /* __x86_64 */
+
+typedef union
+{
+    su_int u;
+    float f;
+} float_bits;
+
+typedef union
+{
+    udwords u;
+    double  f;
+} double_bits;
+
+typedef struct
+{
+#if _YUGA_LITTLE_ENDIAN
+    udwords low;
+    udwords high;
+#else
+    udwords high;
+    udwords low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+} uqwords;
+
+typedef union
+{
+    uqwords     u;
+    long double f;
+} long_double_bits;
+
+#endif /* INT_TYPES_H */
+
diff --git a/lib/int_util.c b/lib/int_util.c
new file mode 100644
index 0000000..871d191
--- /dev/null
+++ b/lib/int_util.c
@@ -0,0 +1,56 @@
+/* ===-- int_util.c - Implement internal utilities --------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_util.h"
+#include "int_lib.h"
+
+/* NOTE: The definitions in this file are declared weak because we clients to be
+ * able to arbitrarily package individual functions into separate .a files. If
+ * we did not declare these weak, some link situations might end up seeing
+ * duplicate strong definitions of the same symbol.
+ *
+ * We can't use this solution for kernel use (which may not support weak), but
+ * currently expect that when built for kernel use all the functionality is
+ * packaged into a single library.
+ */
+
+#ifdef KERNEL_USE
+
+extern void panic(const char *, ...) __attribute__((noreturn));
+__attribute__((visibility("hidden")))
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  panic("%s:%d: abort in %s", file, line, function);
+}
+
+#elif __APPLE__ && !__STATIC__
+
+/* from libSystem.dylib */
+extern void __assert_rtn(const char *func, const char *file, 
+                     int line, const char * message) __attribute__((noreturn));
+
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  __assert_rtn(function, file, line, "libcompiler_rt abort");
+}
+
+
+#else
+
+/* Get the system definition of abort() */
+#include <stdlib.h>
+
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  abort();
+}
+
+#endif
diff --git a/lib/int_util.h b/lib/int_util.h
new file mode 100644
index 0000000..1348b85
--- /dev/null
+++ b/lib/int_util.h
@@ -0,0 +1,29 @@
+/* ===-- int_util.h - internal utility functions ----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines non-inline utilities which are available for use in the
+ * library. The function definitions themselves are all contained in int_util.c
+ * which will always be compiled into any compiler-rt library.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef INT_UTIL_H
+#define INT_UTIL_H
+
+/** \brief Trigger a program abort (or panic for kernel code). */
+#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, \
+                                                 __FUNCTION__)
+
+void compilerrt_abort_impl(const char *file, int line,
+                           const char *function) __attribute__((noreturn));
+
+#endif /* INT_UTIL_H */
diff --git a/lib/lshrdi3.c b/lib/lshrdi3.c
index 911edb1..8af3e0c 100644
--- a/lib/lshrdi3.c
+++ b/lib/lshrdi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/moddi3.c b/lib/moddi3.c
index af0a808..2f3b9cc 100644
--- a/lib/moddi3.c
+++ b/lib/moddi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/modsi3.c b/lib/modsi3.c
index 05ce806..d16213c 100644
--- a/lib/modsi3.c
+++ b/lib/modsi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/muldc3.c b/lib/muldc3.c
index 9f9bd2a..5f4a6d1 100644
--- a/lib/muldc3.c
+++ b/lib/muldc3.c
@@ -13,8 +13,7 @@
  */
 
 #include "int_lib.h"
-#include <math.h>
-#include <complex.h>
+#include "int_math.h"
 
 /* Returns: the product of a + ib and c + id */
 
@@ -28,46 +27,46 @@
     double _Complex z;
     __real__ z = __ac - __bd;
     __imag__ z = __ad + __bc;
-    if (isnan(__real__ z) && isnan(__imag__ z))
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
     {
         int __recalc = 0;
-        if (isinf(__a) || isinf(__b))
+        if (crt_isinf(__a) || crt_isinf(__b))
         {
-            __a = copysign(isinf(__a) ? 1 : 0, __a);
-            __b = copysign(isinf(__b) ? 1 : 0, __b);
-            if (isnan(__c))
-                __c = copysign(0, __c);
-            if (isnan(__d))
-                __d = copysign(0, __d);
+            __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysign(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysign(0, __d);
             __recalc = 1;
         }
-        if (isinf(__c) || isinf(__d))
+        if (crt_isinf(__c) || crt_isinf(__d))
         {
-            __c = copysign(isinf(__c) ? 1 : 0, __c);
-            __d = copysign(isinf(__d) ? 1 : 0, __d);
-            if (isnan(__a))
-                __a = copysign(0, __a);
-            if (isnan(__b))
-                __b = copysign(0, __b);
+            __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysign(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysign(0, __b);
             __recalc = 1;
         }
-        if (!__recalc && (isinf(__ac) || isinf(__bd) ||
-                          isinf(__ad) || isinf(__bc)))
+        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+                          crt_isinf(__ad) || crt_isinf(__bc)))
         {
-            if (isnan(__a))
-                __a = copysign(0, __a);
-            if (isnan(__b))
-                __b = copysign(0, __b);
-            if (isnan(__c))
-                __c = copysign(0, __c);
-            if (isnan(__d))
-                __d = copysign(0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysign(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysign(0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysign(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysign(0, __d);
             __recalc = 1;
         }
         if (__recalc)
         {
-            __real__ z = INFINITY * (__a * __c - __b * __d);
-            __imag__ z = INFINITY * (__a * __d + __b * __c);
+            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
+            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/lib/muldf3.c b/lib/muldf3.c
index f402cfb..86d72d8 100644
--- a/lib/muldf3.c
+++ b/lib/muldf3.c
@@ -11,7 +11,6 @@
 // with the IEEE-754 default rounding (to nearest, ties to even).
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/muldi3.c b/lib/muldi3.c
index e6322bf..3e99630 100644
--- a/lib/muldi3.c
+++ b/lib/muldi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/mulodi4.c b/lib/mulodi4.c
new file mode 100644
index 0000000..0c1b5cd
--- /dev/null
+++ b/lib/mulodi4.c
@@ -0,0 +1,58 @@
+/*===-- mulodi4.c - Implement __mulodi4 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulodi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1  if a * b overflows */
+
+di_int
+__mulodi4(di_int a, di_int b, int* overflow)
+{
+    const int N = (int)(sizeof(di_int) * CHAR_BIT);
+    const di_int MIN = (di_int)1 << (N-1);
+    const di_int MAX = ~MIN;
+    *overflow = 0; 
+    di_int result = a * b;
+    if (a == MIN)
+    {
+        if (b != 0 && b != 1)
+	    *overflow = 1;
+	return result;
+    }
+    if (b == MIN)
+    {
+        if (a != 0 && a != 1)
+	    *overflow = 1;
+        return result;
+    }
+    di_int sa = a >> (N - 1);
+    di_int abs_a = (a ^ sa) - sa;
+    di_int sb = b >> (N - 1);
+    di_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return result;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            *overflow = 1;
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            *overflow = 1;
+    }
+    return result;
+}
diff --git a/lib/mulosi4.c b/lib/mulosi4.c
new file mode 100644
index 0000000..f3398d1
--- /dev/null
+++ b/lib/mulosi4.c
@@ -0,0 +1,58 @@
+/*===-- mulosi4.c - Implement __mulosi4 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulosi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1  if a * b overflows */
+
+si_int
+__mulosi4(si_int a, si_int b, int* overflow)
+{
+    const int N = (int)(sizeof(si_int) * CHAR_BIT);
+    const si_int MIN = (si_int)1 << (N-1);
+    const si_int MAX = ~MIN;
+    *overflow = 0; 
+    si_int result = a * b;
+    if (a == MIN)
+    {
+        if (b != 0 && b != 1)
+	    *overflow = 1;
+	return result;
+    }
+    if (b == MIN)
+    {
+        if (a != 0 && a != 1)
+	    *overflow = 1;
+        return result;
+    }
+    si_int sa = a >> (N - 1);
+    si_int abs_a = (a ^ sa) - sa;
+    si_int sb = b >> (N - 1);
+    si_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return result;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            *overflow = 1;
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            *overflow = 1;
+    }
+    return result;
+}
diff --git a/lib/muloti4.c b/lib/muloti4.c
new file mode 100644
index 0000000..1fcd0ba
--- /dev/null
+++ b/lib/muloti4.c
@@ -0,0 +1,62 @@
+/*===-- muloti4.c - Implement __muloti4 -----------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __muloti4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if __x86_64
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1  if a * b overflows */
+
+ti_int
+__muloti4(ti_int a, ti_int b, int* overflow)
+{
+    const int N = (int)(sizeof(ti_int) * CHAR_BIT);
+    const ti_int MIN = (ti_int)1 << (N-1);
+    const ti_int MAX = ~MIN;
+    *overflow = 0;
+    ti_int result = a * b;
+    if (a == MIN)
+    {
+        if (b != 0 && b != 1)
+	    *overflow = 1;
+	return result;
+    }
+    if (b == MIN)
+    {
+        if (a != 0 && a != 1)
+	    *overflow = 1;
+        return result;
+    }
+    ti_int sa = a >> (N - 1);
+    ti_int abs_a = (a ^ sa) - sa;
+    ti_int sb = b >> (N - 1);
+    ti_int abs_b = (b ^ sb) - sb;
+    if (abs_a < 2 || abs_b < 2)
+        return result;
+    if (sa == sb)
+    {
+        if (abs_a > MAX / abs_b)
+            *overflow = 1;
+    }
+    else
+    {
+        if (abs_a > MIN / -abs_b)
+            *overflow = 1;
+    }
+    return result;
+}
+
+#endif
diff --git a/lib/mulsc3.c b/lib/mulsc3.c
index a878ba1..6d433fb 100644
--- a/lib/mulsc3.c
+++ b/lib/mulsc3.c
@@ -13,8 +13,7 @@
  */
 
 #include "int_lib.h"
-#include <math.h>
-#include <complex.h>
+#include "int_math.h"
 
 /* Returns: the product of a + ib and c + id */
 
@@ -28,46 +27,46 @@
     float _Complex z;
     __real__ z = __ac - __bd;
     __imag__ z = __ad + __bc;
-    if (isnan(__real__ z) && isnan(__imag__ z))
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
     {
         int __recalc = 0;
-        if (isinf(__a) || isinf(__b))
+        if (crt_isinf(__a) || crt_isinf(__b))
         {
-            __a = copysignf(isinf(__a) ? 1 : 0, __a);
-            __b = copysignf(isinf(__b) ? 1 : 0, __b);
-            if (isnan(__c))
-                __c = copysignf(0, __c);
-            if (isnan(__d))
-                __d = copysignf(0, __d);
+            __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysignf(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysignf(0, __d);
             __recalc = 1;
         }
-        if (isinf(__c) || isinf(__d))
+        if (crt_isinf(__c) || crt_isinf(__d))
         {
-            __c = copysignf(isinf(__c) ? 1 : 0, __c);
-            __d = copysignf(isinf(__d) ? 1 : 0, __d);
-            if (isnan(__a))
-                __a = copysignf(0, __a);
-            if (isnan(__b))
-                __b = copysignf(0, __b);
+            __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysignf(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysignf(0, __b);
             __recalc = 1;
         }
-        if (!__recalc && (isinf(__ac) || isinf(__bd) ||
-                          isinf(__ad) || isinf(__bc)))
+        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+                          crt_isinf(__ad) || crt_isinf(__bc)))
         {
-            if (isnan(__a))
-                __a = copysignf(0, __a);
-            if (isnan(__b))
-                __b = copysignf(0, __b);
-            if (isnan(__c))
-                __c = copysignf(0, __c);
-            if (isnan(__d))
-                __d = copysignf(0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysignf(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysignf(0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysignf(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysignf(0, __d);
             __recalc = 1;
         }
         if (__recalc)
         {
-            __real__ z = INFINITY * (__a * __c - __b * __d);
-            __imag__ z = INFINITY * (__a * __d + __b * __c);
+            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
+            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/lib/mulsf3.c b/lib/mulsf3.c
index bf46e14..fce2fd4 100644
--- a/lib/mulsf3.c
+++ b/lib/mulsf3.c
@@ -11,7 +11,6 @@
 // with the IEEE-754 default rounding (to nearest, ties to even).
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define SINGLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/mulvdi3.c b/lib/mulvdi3.c
index fcbb5b3..bcc8e65 100644
--- a/lib/mulvdi3.c
+++ b/lib/mulvdi3.c
@@ -13,7 +13,6 @@
  */
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a * b */
 
diff --git a/lib/mulvsi3.c b/lib/mulvsi3.c
index 6271cd4..d372b20 100644
--- a/lib/mulvsi3.c
+++ b/lib/mulvsi3.c
@@ -13,7 +13,6 @@
  */
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a * b */
 
diff --git a/lib/mulvti3.c b/lib/mulvti3.c
index 7da9187..ae65cf8 100644
--- a/lib/mulvti3.c
+++ b/lib/mulvti3.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a * b */
 
diff --git a/lib/mulxc3.c b/lib/mulxc3.c
index b5ae865..cec0573 100644
--- a/lib/mulxc3.c
+++ b/lib/mulxc3.c
@@ -15,8 +15,7 @@
 #if !_ARCH_PPC
 
 #include "int_lib.h"
-#include <math.h>
-#include <complex.h>
+#include "int_math.h"
 
 /* Returns: the product of a + ib and c + id */
 
@@ -30,46 +29,46 @@
     long double _Complex z;
     __real__ z = __ac - __bd;
     __imag__ z = __ad + __bc;
-    if (isnan(__real__ z) && isnan(__imag__ z))
+    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
     {
         int __recalc = 0;
-        if (isinf(__a) || isinf(__b))
+        if (crt_isinf(__a) || crt_isinf(__b))
         {
-            __a = copysignl(isinf(__a) ? 1 : 0, __a);
-            __b = copysignl(isinf(__b) ? 1 : 0, __b);
-            if (isnan(__c))
-                __c = copysignl(0, __c);
-            if (isnan(__d))
-                __d = copysignl(0, __d);
+            __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a);
+            __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysignl(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysignl(0, __d);
             __recalc = 1;
         }
-        if (isinf(__c) || isinf(__d))
+        if (crt_isinf(__c) || crt_isinf(__d))
         {
-            __c = copysignl(isinf(__c) ? 1 : 0, __c);
-            __d = copysignl(isinf(__d) ? 1 : 0, __d);
-            if (isnan(__a))
-                __a = copysignl(0, __a);
-            if (isnan(__b))
-                __b = copysignl(0, __b);
+            __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c);
+            __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysignl(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysignl(0, __b);
             __recalc = 1;
         }
-        if (!__recalc && (isinf(__ac) || isinf(__bd) ||
-                          isinf(__ad) || isinf(__bc)))
+        if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+                          crt_isinf(__ad) || crt_isinf(__bc)))
         {
-            if (isnan(__a))
-                __a = copysignl(0, __a);
-            if (isnan(__b))
-                __b = copysignl(0, __b);
-            if (isnan(__c))
-                __c = copysignl(0, __c);
-            if (isnan(__d))
-                __d = copysignl(0, __d);
+            if (crt_isnan(__a))
+                __a = crt_copysignl(0, __a);
+            if (crt_isnan(__b))
+                __b = crt_copysignl(0, __b);
+            if (crt_isnan(__c))
+                __c = crt_copysignl(0, __c);
+            if (crt_isnan(__d))
+                __d = crt_copysignl(0, __d);
             __recalc = 1;
         }
         if (__recalc)
         {
-            __real__ z = INFINITY * (__a * __c - __b * __d);
-            __imag__ z = INFINITY * (__a * __d + __b * __c);
+            __real__ z = CRT_INFINITY * (__a * __c - __b * __d);
+            __imag__ z = CRT_INFINITY * (__a * __d + __b * __c);
         }
     }
     return z;
diff --git a/lib/negdf2.c b/lib/negdf2.c
index b47f397..b11b480 100644
--- a/lib/negdf2.c
+++ b/lib/negdf2.c
@@ -10,7 +10,6 @@
 // This file implements double-precision soft-float negation.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/negsf2.c b/lib/negsf2.c
index 98f9fc0..f8ef2d1 100644
--- a/lib/negsf2.c
+++ b/lib/negsf2.c
@@ -10,7 +10,6 @@
 // This file implements single-precision soft-float negation.
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define SINGLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/negvdi2.c b/lib/negvdi2.c
index aafaa9d..e336ecf 100644
--- a/lib/negvdi2.c
+++ b/lib/negvdi2.c
@@ -11,10 +11,8 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: -a */
 
diff --git a/lib/negvsi2.c b/lib/negvsi2.c
index 559ea18..b9e93fe 100644
--- a/lib/negvsi2.c
+++ b/lib/negvsi2.c
@@ -11,10 +11,8 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: -a */
 
diff --git a/lib/negvti2.c b/lib/negvti2.c
index d931305..ef766bb 100644
--- a/lib/negvti2.c
+++ b/lib/negvti2.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: -a */
 
diff --git a/lib/paritydi2.c b/lib/paritydi2.c
index e7bebf6..2ded54c 100644
--- a/lib/paritydi2.c
+++ b/lib/paritydi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/paritysi2.c b/lib/paritysi2.c
index 64d509f..5999846 100644
--- a/lib/paritysi2.c
+++ b/lib/paritysi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/popcountdi2.c b/lib/popcountdi2.c
index 136fc04..5e8a62f 100644
--- a/lib/popcountdi2.c
+++ b/lib/popcountdi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/popcountsi2.c b/lib/popcountsi2.c
index bfaa3ff..44544ff 100644
--- a/lib/popcountsi2.c
+++ b/lib/popcountsi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/powidf2.c b/lib/powidf2.c
index 2e211eb..ac13b17 100644
--- a/lib/powidf2.c
+++ b/lib/powidf2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/powisf2.c b/lib/powisf2.c
index e6b43b3..0c400ec 100644
--- a/lib/powisf2.c
+++ b/lib/powisf2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/ppc/CMakeLists.txt b/lib/ppc/CMakeLists.txt
deleted file mode 100644
index fb0fcd5..0000000
--- a/lib/ppc/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-SET( SRCS
- fixtfdi.c
- gcc_qdiv.c
- gcc_qmul.c
- divtc3.c
- gcc_qsub.c
- multc3.c
- floatditf.c
- gcc_qadd.c
- fixunstfdi.c
- floatunditf.c
- )
diff --git a/lib/ppc/DD.h b/lib/ppc/DD.h
index 9ecd1f5..13862dc 100644
--- a/lib/ppc/DD.h
+++ b/lib/ppc/DD.h
@@ -1,7 +1,7 @@
 #ifndef __DD_HEADER
 #define __DD_HEADER
 
-#include <stdint.h>
+#include "../int_lib.h"
 
 typedef union {
 	long double ld;
diff --git a/lib/ppc/Makefile.mk b/lib/ppc/Makefile.mk
index 519d654..b78d386 100644
--- a/lib/ppc/Makefile.mk
+++ b/lib/ppc/Makefile.mk
@@ -7,6 +7,7 @@
 #
 #===------------------------------------------------------------------------===#
 
+ModuleName := builtins
 SubDirs := 
 OnlyArchs := ppc
 
diff --git a/lib/ppc/divtc3.c b/lib/ppc/divtc3.c
index d41f621..2991281 100644
--- a/lib/ppc/divtc3.c
+++ b/lib/ppc/divtc3.c
@@ -3,16 +3,16 @@
  */
 
 #include "DD.h"
-#include <math.h>
+#include "../int_math.h"
 
-#if !defined(INFINITY) && defined(HUGE_VAL)
-#define INFINITY HUGE_VAL
-#endif /* INFINITY */
+#if !defined(CRT_INFINITY) && defined(HUGE_VAL)
+#define CRT_INFINITY HUGE_VAL
+#endif /* CRT_INFINITY */
 
-#define makeFinite(x)	{ \
-							(x).s.hi = __builtin_copysign(isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \
-							(x).s.lo = 0.0; \
-						}
+#define makeFinite(x) { \
+    (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \
+    (x).s.lo = 0.0;                                                     \
+  }
 
 long double __gcc_qadd(long double, long double);
 long double __gcc_qsub(long double, long double);
@@ -26,16 +26,16 @@
 	DD dDD = { .ld = d };
 	
 	int ilogbw = 0;
-	const double logbw = logb(__builtin_fmax( __builtin_fabs(cDD.s.hi), __builtin_fabs(dDD.s.hi) ));
+	const double logbw = crt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi) ));
 	
-	if (isfinite(logbw))
+	if (crt_isfinite(logbw))
 	{
 		ilogbw = (int)logbw;
 		
-		cDD.s.hi = scalbn(cDD.s.hi, -ilogbw);
-		cDD.s.lo = scalbn(cDD.s.lo, -ilogbw);
-		dDD.s.hi = scalbn(dDD.s.hi, -ilogbw);
-		dDD.s.lo = scalbn(dDD.s.lo, -ilogbw);
+		cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw);
+		cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw);
+		dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw);
+		dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw);
 	}
 	
 	const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld));
@@ -45,42 +45,45 @@
 	DD real = { .ld = __gcc_qdiv(realNumerator, denom) };
 	DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) };
 	
-	real.s.hi = scalbn(real.s.hi, -ilogbw);
-	real.s.lo = scalbn(real.s.lo, -ilogbw);
-	imag.s.hi = scalbn(imag.s.hi, -ilogbw);
-	imag.s.lo = scalbn(imag.s.lo, -ilogbw);
+	real.s.hi = crt_scalbn(real.s.hi, -ilogbw);
+	real.s.lo = crt_scalbn(real.s.lo, -ilogbw);
+	imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw);
+	imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw);
 	
-	if (isnan(real.s.hi) && isnan(imag.s.hi))
+	if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi))
 	{
 		DD aDD = { .ld = a };
 		DD bDD = { .ld = b };
 		DD rDD = { .ld = denom };
 		
-		if ((rDD.s.hi == 0.0) && (!isnan(aDD.s.hi) || !isnan(bDD.s.hi)))
+		if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) ||
+                                          !crt_isnan(bDD.s.hi)))
 		{
-			real.s.hi = __builtin_copysign(INFINITY,cDD.s.hi) * aDD.s.hi;
+			real.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * aDD.s.hi;
 			real.s.lo = 0.0;
-			imag.s.hi = __builtin_copysign(INFINITY,cDD.s.hi) * bDD.s.hi;
+			imag.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * bDD.s.hi;
 			imag.s.lo = 0.0;
 		}
 		
-		else if ((isinf(aDD.s.hi) || isinf(bDD.s.hi)) && isfinite(cDD.s.hi) && isfinite(dDD.s.hi))
+		else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) &&
+                         crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi))
 		{
 			makeFinite(aDD);
 			makeFinite(bDD);
-			real.s.hi = INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi);
+			real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi);
 			real.s.lo = 0.0;
-			imag.s.hi = INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi);
+			imag.s.hi = CRT_INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi);
 			imag.s.lo = 0.0;
 		}
 		
-		else if ((isinf(cDD.s.hi) || isinf(dDD.s.hi)) && isfinite(aDD.s.hi) && isfinite(bDD.s.hi))
+		else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) &&
+                         crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi))
 		{
 			makeFinite(cDD);
 			makeFinite(dDD);
-			real.s.hi = __builtin_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi));
+			real.s.hi = crt_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi));
 			real.s.lo = 0.0;
-			imag.s.hi = __builtin_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi));
+			imag.s.hi = crt_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi));
 			imag.s.lo = 0.0;
 		}
 	}
diff --git a/lib/ppc/fixtfdi.c b/lib/ppc/fixtfdi.c
index fa113a0..56e7b3f 100644
--- a/lib/ppc/fixtfdi.c
+++ b/lib/ppc/fixtfdi.c
@@ -7,7 +7,7 @@
  */
 
 #include "DD.h"
-#include <stdint.h>
+#include "../int_math.h"
 
 uint64_t __fixtfdi(long double input)
 {
@@ -65,7 +65,7 @@
 	/* Edge cases handled here: */
 	
 	/* |x| < 1, result is zero. */
-	if (1.0 > __builtin_fabs(x.s.hi))
+	if (1.0 > crt_fabs(x.s.hi))
 		return INT64_C(0);
 	
 	/* x very close to INT64_MIN, care must be taken to see which side we are on. */
diff --git a/lib/ppc/fixunstfdi.c b/lib/ppc/fixunstfdi.c
index 1fb5248..5e6e2ce 100644
--- a/lib/ppc/fixunstfdi.c
+++ b/lib/ppc/fixunstfdi.c
@@ -6,7 +6,6 @@
 /* This file implements the PowerPC 128-bit double-double -> uint64_t conversion */
 
 #include "DD.h"
-#include <stdint.h>
 
 uint64_t __fixunstfdi(long double input)
 {
diff --git a/lib/ppc/floatditf.c b/lib/ppc/floatditf.c
index ed23dc8..beabdd01 100644
--- a/lib/ppc/floatditf.c
+++ b/lib/ppc/floatditf.c
@@ -6,7 +6,6 @@
 /* This file implements the PowerPC long long -> long double conversion */
 
 #include "DD.h"
-#include <stdint.h>
 
 long double __floatditf(int64_t a) {
 	
diff --git a/lib/ppc/floatunditf.c b/lib/ppc/floatunditf.c
index 20a3b71..b12e1e7 100644
--- a/lib/ppc/floatunditf.c
+++ b/lib/ppc/floatunditf.c
@@ -6,7 +6,6 @@
 /* This file implements the PowerPC unsigned long long -> long double conversion */
 
 #include "DD.h"
-#include <stdint.h>
 
 long double __floatunditf(uint64_t a) {
 	
diff --git a/lib/ppc/multc3.c b/lib/ppc/multc3.c
index 9d17a2c..738b65a 100644
--- a/lib/ppc/multc3.c
+++ b/lib/ppc/multc3.c
@@ -3,23 +3,19 @@
  */
 
 #include "DD.h"
-#include <math.h>
+#include "../int_math.h"
 
-#if !defined(INFINITY) && defined(HUGE_VAL)
-#define INFINITY HUGE_VAL
-#endif /* INFINITY */
+#define makeFinite(x) { \
+    (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \
+    (x).s.lo = 0.0;                                                     \
+  }
 
-#define makeFinite(x)	{ \
-							(x).s.hi = __builtin_copysign(isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \
-							(x).s.lo = 0.0; \
-						}
-
-#define zeroNaN(x)		{ \
-							if (isnan((x).s.hi)) { \
-								(x).s.hi = __builtin_copysign(0.0, (x).s.hi); \
-								(x).s.lo = 0.0; \
-							} \
-						}
+#define zeroNaN(x) { \
+    if (crt_isnan((x).s.hi)) {                                          \
+      (x).s.hi = crt_copysign(0.0, (x).s.hi);                     \
+      (x).s.lo = 0.0;                                                   \
+    }                                                                   \
+  }
 
 long double __gcc_qadd(long double, long double);
 long double __gcc_qsub(long double, long double);
@@ -36,7 +32,7 @@
 	DD real = { .ld = __gcc_qsub(ac,bd) };
 	DD imag = { .ld = __gcc_qadd(ad,bc) };
 	
-	if (isnan(real.s.hi) && isnan(imag.s.hi))
+	if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi))
 	{
 		int recalc = 0;
 		
@@ -45,7 +41,7 @@
 		DD cDD = { .ld = c };
 		DD dDD = { .ld = d };
 		
-		if (isinf(aDD.s.hi) || isinf(bDD.s.hi))
+		if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi))
 		{
 			makeFinite(aDD);
 			makeFinite(bDD);
@@ -54,7 +50,7 @@
 			recalc = 1;
 		}
 		
-		if (isinf(cDD.s.hi) || isinf(dDD.s.hi))
+		if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi))
 		{
 			makeFinite(cDD);
 			makeFinite(dDD);
@@ -70,7 +66,8 @@
 			DD adDD = { .ld = ad };
 			DD bcDD = { .ld = bc };
 			
-			if (isinf(acDD.s.hi) || isinf(bdDD.s.hi) || isinf(adDD.s.hi) || isinf(bcDD.s.hi))
+			if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) ||
+                            crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi))
 			{
 				zeroNaN(aDD);
 				zeroNaN(bDD);
@@ -82,9 +79,9 @@
 		
 		if (recalc)
 		{
-			real.s.hi = INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi);
+			real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi);
 			real.s.lo = 0.0;
-			imag.s.hi = INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi);
+			imag.s.hi = CRT_INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi);
 			imag.s.lo = 0.0;
 		}
 	}
diff --git a/lib/profile/GCDAProfiling.c b/lib/profile/GCDAProfiling.c
new file mode 100644
index 0000000..fd506e9
--- /dev/null
+++ b/lib/profile/GCDAProfiling.c
@@ -0,0 +1,200 @@
+/*===- GCDAProfiling.c - Support library for GCDA file emission -----------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|* 
+|*===----------------------------------------------------------------------===*|
+|* 
+|* This file implements the call back routines for the gcov profiling
+|* instrumentation pass. Link against this library when running code through
+|* the -insert-gcov-profiling LLVM pass.
+|*
+|* We emit files in a corrupt version of GCOV's "gcda" file format. These files
+|* are only close enough that LCOV will happily parse them. Anything that lcov
+|* ignores is missing.
+|*
+|* TODO: gcov is multi-process safe by having each exit open the existing file
+|* and append to it. We'd like to achieve that and be thread-safe too.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#ifdef _WIN32
+#include <direct.h>
+#endif
+
+#ifndef _MSC_VER
+#include <stdint.h>
+#else
+typedef unsigned int uint32_t;
+typedef unsigned int uint64_t;
+#endif
+
+/* #define DEBUG_GCDAPROFILING */
+
+/*
+ * --- GCOV file format I/O primitives ---
+ */
+
+static FILE *output_file = NULL;
+
+static void write_int32(uint32_t i) {
+  fwrite(&i, 4, 1, output_file);
+}
+
+static void write_int64(uint64_t i) {
+  uint32_t lo, hi;
+  lo = i >>  0;
+  hi = i >> 32;
+
+  write_int32(lo);
+  write_int32(hi);
+}
+
+static uint32_t length_of_string(const char *s) {
+  return (strlen(s) / 4) + 1;
+}
+
+static void write_string(const char *s) {
+  uint32_t len = length_of_string(s);
+  write_int32(len);
+  fwrite(s, strlen(s), 1, output_file);
+  fwrite("\0\0\0\0", 4 - (strlen(s) % 4), 1, output_file);
+}
+
+static char *mangle_filename(const char *orig_filename) {
+  /* TODO: handle GCOV_PREFIX_STRIP */
+  const char *prefix;
+  char *filename = 0;
+
+  prefix = getenv("GCOV_PREFIX");
+
+  if (!prefix)
+    return strdup(orig_filename);
+
+  filename = malloc(strlen(prefix) + 1 + strlen(orig_filename) + 1);
+  strcpy(filename, prefix);
+  strcat(filename, "/");
+  strcat(filename, orig_filename);
+
+  return filename;
+}
+
+static void recursive_mkdir(const char *filename) {
+  char *pathname;
+  int i, e;
+
+  for (i = 1, e = strlen(filename); i != e; ++i) {
+    if (filename[i] == '/') {
+      pathname = malloc(i + 1);
+      strncpy(pathname, filename, i);
+      pathname[i] = '\0';
+#ifdef _WIN32
+      _mkdir(pathname);
+#else
+      mkdir(pathname, 0750);  /* some of these will fail, ignore it. */
+#endif
+      free(pathname);
+    }
+  }
+}
+
+/*
+ * --- LLVM line counter API ---
+ */
+
+/* A file in this case is a translation unit. Each .o file built with line
+ * profiling enabled will emit to a different file. Only one file may be
+ * started at a time.
+ */
+void llvm_gcda_start_file(const char *orig_filename) {
+  char *filename;
+  filename = mangle_filename(orig_filename);
+  recursive_mkdir(filename);
+  output_file = fopen(filename, "wb");
+
+  /* gcda file, version 404*, stamp LLVM. */
+#ifdef __APPLE__
+  fwrite("adcg*204MVLL", 12, 1, output_file);
+#else
+  fwrite("adcg*404MVLL", 12, 1, output_file);
+#endif
+
+#ifdef DEBUG_GCDAPROFILING
+  printf("llvmgcda: [%s]\n", orig_filename);
+#endif
+
+  free(filename);
+}
+
+/* Given an array of pointers to counters (counters), increment the n-th one,
+ * where we're also given a pointer to n (predecessor).
+ */
+void llvm_gcda_increment_indirect_counter(uint32_t *predecessor,
+                                          uint64_t **counters) {
+  uint64_t *counter;
+  uint32_t pred;
+
+  pred = *predecessor;
+  if (pred == 0xffffffff)
+    return;
+  counter = counters[pred];
+
+  /* Don't crash if the pred# is out of sync. This can happen due to threads,
+     or because of a TODO in GCOVProfiling.cpp buildEdgeLookupTable(). */
+  if (counter)
+    ++*counter;
+#ifdef DEBUG_GCDAPROFILING
+  else
+    printf("llvmgcda: increment_indirect_counter counters=%x, pred=%u\n",
+           state_table_row, *predecessor);
+#endif
+}
+
+void llvm_gcda_emit_function(uint32_t ident, const char *function_name) {
+#ifdef DEBUG_GCDAPROFILING
+  printf("llvmgcda: function id=%x\n", ident);
+#endif
+
+  /* function tag */  
+  fwrite("\0\0\0\1", 4, 1, output_file);
+  write_int32(3 + 1 + length_of_string(function_name));
+  write_int32(ident);
+  write_int32(0);
+  write_int32(0);
+  write_string(function_name);
+}
+
+void llvm_gcda_emit_arcs(uint32_t num_counters, uint64_t *counters) {
+  uint32_t i;
+  /* counter #1 (arcs) tag */
+  fwrite("\0\0\xa1\1", 4, 1, output_file);
+  write_int32(num_counters * 2);
+  for (i = 0; i < num_counters; ++i) {
+    write_int64(counters[i]);
+  }
+
+#ifdef DEBUG_GCDAPROFILING
+  printf("llvmgcda:   %u arcs\n", num_counters);
+  for (i = 0; i < num_counters; ++i) {
+    printf("llvmgcda:   %llu\n", (unsigned long long)counters[i]);
+  }
+#endif
+}
+
+void llvm_gcda_end_file() {
+  /* Write out EOF record. */
+  fwrite("\0\0\0\0\0\0\0\0", 8, 1, output_file);
+  fclose(output_file);
+  output_file = NULL;
+
+#ifdef DEBUG_GCDAPROFILING
+  printf("llvmgcda: -----\n");
+#endif
+}
diff --git a/lib/profile/Makefile.mk b/lib/profile/Makefile.mk
new file mode 100644
index 0000000..7689c9a
--- /dev/null
+++ b/lib/profile/Makefile.mk
@@ -0,0 +1,18 @@
+#===- lib/profile/Makefile.mk ------------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := profile
+SubDirs :=
+
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o)
+Implementation := Generic
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard $(Dir)/*.h)
diff --git a/lib/subdf3.c b/lib/subdf3.c
index 825e3c6..5eb1853 100644
--- a/lib/subdf3.c
+++ b/lib/subdf3.c
@@ -11,7 +11,6 @@
 // IEEE-754 default rounding (to nearest, ties to even).
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/subsf3.c b/lib/subsf3.c
index 625376a..351be0e 100644
--- a/lib/subsf3.c
+++ b/lib/subsf3.c
@@ -11,7 +11,6 @@
 // IEEE-754 default rounding (to nearest, ties to even).
 //
 //===----------------------------------------------------------------------===//
-#include "abi.h"
 
 #define SINGLE_PRECISION
 #include "fp_lib.h"
diff --git a/lib/subvdi3.c b/lib/subvdi3.c
index 36b51ad..0f1f924 100644
--- a/lib/subvdi3.c
+++ b/lib/subvdi3.c
@@ -11,10 +11,8 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a - b */
 
diff --git a/lib/subvsi3.c b/lib/subvsi3.c
index 03983f7..ec4594c 100644
--- a/lib/subvsi3.c
+++ b/lib/subvsi3.c
@@ -11,10 +11,8 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a - b */
 
diff --git a/lib/subvti3.c b/lib/subvti3.c
index 5d693dc..44127b7 100644
--- a/lib/subvti3.c
+++ b/lib/subvti3.c
@@ -15,7 +15,6 @@
 #if __x86_64
 
 #include "int_lib.h"
-#include <stdlib.h>
 
 /* Returns: a - b */
 
diff --git a/lib/trampoline_setup.c b/lib/trampoline_setup.c
index ced119f..e0765b1 100644
--- a/lib/trampoline_setup.c
+++ b/lib/trampoline_setup.c
@@ -8,9 +8,6 @@
  * ===----------------------------------------------------------------------===
  */
 
-#include <stdint.h>
-#include <stdlib.h>
-
 #include "int_lib.h"
 
 extern void __clear_cache(void* start, void* end);
@@ -23,7 +20,7 @@
  * and then jumps to the target nested function.
  */
 
-#if __ppc__
+#if __ppc__ && !defined(__powerpc64__)
 void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, 
                                 const void* realFunc, void* localsPtr)
 {
@@ -47,4 +44,4 @@
     /* clear instruction cache */
     __clear_cache(trampOnStack, &trampOnStack[10]);
 }
-#endif /* __ppc__ */
+#endif /* __ppc__ && !defined(__powerpc64__) */
diff --git a/lib/truncdfsf2.c b/lib/truncdfsf2.c
index 1dbf02f..f57af7e 100644
--- a/lib/truncdfsf2.c
+++ b/lib/truncdfsf2.c
@@ -37,11 +37,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <stdint.h>
-#include <limits.h>
-#include <stdbool.h>
-
-#include "abi.h"
+#include "int_lib.h"
 
 typedef double src_t;
 typedef uint64_t src_rep_t;
diff --git a/lib/tsan/Makefile.old b/lib/tsan/Makefile.old
new file mode 100644
index 0000000..0b1c458
--- /dev/null
+++ b/lib/tsan/Makefile.old
@@ -0,0 +1,95 @@
+DEBUG=0
+LDFLAGS=-ldl -lpthread -pie
+CXXFLAGS = -fPIE -g -Wall -Werror  -DTSAN_DEBUG=$(DEBUG)
+ifeq ($(DEBUG), 0)
+	CXXFLAGS += -O3
+endif
+
+
+LIBTSAN=rtl/libtsan.a
+GTEST_ROOT=third_party/googletest
+GTEST_INCLUDE=-I$(GTEST_ROOT)/include
+GTEST_BUILD_DIR=$(GTEST_ROOT)/build
+GTEST_LIB=$(GTEST_BUILD_DIR)/gtest-all.o
+
+RTL_TEST_SRC=$(wildcard rtl_tests/*.cc)
+RTL_TEST_OBJ=$(patsubst %.cc,%.o,$(RTL_TEST_SRC))
+UNIT_TEST_SRC=$(wildcard unit_tests/*_test.cc)
+UNIT_TEST_OBJ=$(patsubst %.cc,%.o,$(UNIT_TEST_SRC))
+UNIT_TEST_HDR=$(wildcard rtl/*.h)
+
+INCLUDES=-Irtl $(GTEST_INCLUDE)
+
+all: $(LIBTSAN) test
+
+help:
+	@ echo "A little help is always welcome!"
+	@ echo "The most useful targets are:"
+	@ echo "  make install_deps  # Install third-party dependencies required for building"
+	@ echo "  make presubmit     # Run it every time before committing"
+	@ echo "  make lint          # Run the style checker"
+	@ echo
+	@ echo "For more info, see http://code.google.com/p/data-race-test/wiki/ThreadSanitizer2"
+
+$(LIBTSAN):
+	$(MAKE) -C rtl -f Makefile.old DEBUG=$(DEBUG)
+
+unit_tests/%_test.o: unit_tests/%_test.cc $(UNIT_TEST_HDR)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $@ -c $<
+
+rtl_tests/%.o: rtl_tests/%.cc $(LIBTSAN_HEADERS)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) -o $@ -c $<
+
+#%.o: %.c $(LIBTSAN_HEADERS)
+#	$(CC) $(CXXFLAGS) $(INCLUDES) -c $<
+
+tsan_test: $(TEST_OBJ) $(UNIT_TEST_OBJ) $(RTL_TEST_OBJ) $(LIBTSAN) $(GTEST_LIB)
+	$(CXX) $^ -o $@ $(LDFLAGS)
+
+test: $(LIBTSAN) tsan_test
+
+run: all
+	(ulimit -s 8192; ./tsan_test)
+
+presubmit:
+	$(MAKE) -f Makefile.old lint -j 4
+	# Debug build with clang.
+	$(MAKE) -f Makefile.old clean
+	$(MAKE) -f Makefile.old run DEBUG=1 -j 16 CC=clang CXX=clang++
+	./output_tests/test_output.sh
+	# Release build gcc
+	$(MAKE) -f Makefile.old clean
+	$(MAKE) -f Makefile.old run DEBUG=0 -j 16 CC=gcc CXX=g++
+	./check_analyze.sh
+	./output_tests/test_output.sh
+	@ echo PRESUBMIT PASSED
+
+RTL_LINT_FITLER=-legal/copyright,-build/include,-readability/casting,-build/header_guard
+
+lint: lint_tsan lint_tests
+lint_tsan:
+	third_party/cpplint/cpplint.py --filter=$(RTL_LINT_FITLER) rtl/*.{cc,h}
+lint_tests:
+	third_party/cpplint/cpplint.py --filter=$(RTL_LINT_FITLER) rtl_tests/*.{cc,h}
+
+install_deps:
+	rm -rf third_party
+	mkdir third_party
+	(cd third_party && \
+	svn co -r613 http://googletest.googlecode.com/svn/trunk googletest && \
+	svn co -r82 http://google-styleguide.googlecode.com/svn/trunk/cpplint cpplint \
+        )
+
+# Remove verbose printf from lint. Not strictly necessary.
+hack_cpplint:
+	sed -i "s/  sys.stderr.write('Done processing.*//g" third_party/cpplint/cpplint.py
+
+$(GTEST_LIB):
+	mkdir -p $(GTEST_BUILD_DIR) && \
+	cd $(GTEST_BUILD_DIR) && \
+	$(MAKE) -f ../make/Makefile CXXFLAGS="$(CXXFLAGS)" CFLAGS="$(CFLAGS)" CC=$(CC) CXX=$(CXX)
+
+clean:
+	rm -f asm_*.s libtsan.nm libtsan.objdump */*.o tsan_test
+	rm -rf $(GTEST_BUILD_DIR)
+	$(MAKE) clean -C rtl -f Makefile.old
diff --git a/lib/tsan/analyze_libtsan.sh b/lib/tsan/analyze_libtsan.sh
new file mode 100755
index 0000000..e080561
--- /dev/null
+++ b/lib/tsan/analyze_libtsan.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -e
+set -u
+
+get_asm() {
+  grep tsan_$1.: -A 10000 libtsan.objdump | \
+    awk "/[^:]$/ {print;} />:/ {c++; if (c == 2) {exit}}"
+}
+
+list="write1 \
+      write2 \
+      write4 \
+      write8 \
+      read1 \
+      read2 \
+      read4 \
+      read8 \
+      func_entry \
+      func_exit"
+
+BIN=`dirname $0`/tsan_test
+objdump -d $BIN  > libtsan.objdump
+nm -S $BIN | grep "__tsan_" > libtsan.nm
+
+for f in $list; do
+  file=asm_$f.s
+  get_asm $f > $file
+  tot=$(wc -l < $file)
+  size=$(grep $f$ libtsan.nm | awk --non-decimal-data '{print ("0x"$2)+0}')
+  rsp=$(grep '(%rsp)' $file | wc -l)
+  push=$(grep 'push' $file | wc -l)
+  pop=$(grep 'pop' $file | wc -l)
+  call=$(grep 'call' $file | wc -l)
+  load=$(egrep 'mov .*\,.*\(.*\)|cmp .*\,.*\(.*\)' $file | wc -l)
+  store=$(egrep 'mov .*\(.*\),' $file | wc -l)
+  mov=$(grep 'mov' $file | wc -l)
+  lea=$(grep 'lea' $file | wc -l)
+  sh=$(grep 'shr\|shl' $file | wc -l)
+  cmp=$(grep 'cmp\|test' $file | wc -l)
+  printf "%10s tot %3d; size %4d; rsp %d; push %d; pop %d; call %d; load %2d; store %2d; sh %3d; mov %3d; lea %3d; cmp %3d\n" \
+    $f $tot $size $rsp $push $pop $call $load $store $sh $mov $lea $cmp;
+done
diff --git a/lib/tsan/benchmarks/mini_bench_local.cc b/lib/tsan/benchmarks/mini_bench_local.cc
new file mode 100644
index 0000000..accdcb6
--- /dev/null
+++ b/lib/tsan/benchmarks/mini_bench_local.cc
@@ -0,0 +1,49 @@
+// Mini-benchmark for tsan: non-shared memory writes.
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+int len;
+int *a;
+const int kNumIter = 1000;
+
+__attribute__((noinline))
+void Run(int idx) {
+  for (int i = 0, n = len; i < n; i++)
+    a[i + idx * n] = i;
+}
+
+void *Thread(void *arg) {
+  long idx = (long)arg;
+  printf("Thread %ld started\n", idx);
+  for (int i = 0; i < kNumIter; i++)
+    Run(idx);
+  printf("Thread %ld done\n", idx);
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int n_threads = 0;
+  if (argc != 3) {
+    n_threads = 4;
+    len = 1000000;
+  } else {
+    n_threads = atoi(argv[1]);
+    assert(n_threads > 0 && n_threads <= 32);
+    len = atoi(argv[2]);
+  }
+  printf("%s: n_threads=%d len=%d iter=%d\n",
+         __FILE__, n_threads, len, kNumIter);
+  a = new int[n_threads * len];
+  pthread_t *t = new pthread_t[n_threads];
+  for (int i = 0; i < n_threads; i++) {
+    pthread_create(&t[i], 0, Thread, (void*)i);
+  }
+  for (int i = 0; i < n_threads; i++) {
+    pthread_join(t[i], 0);
+  }
+  delete [] t;
+  delete [] a;
+  return 0;
+}
diff --git a/lib/tsan/benchmarks/mini_bench_shared.cc b/lib/tsan/benchmarks/mini_bench_shared.cc
new file mode 100644
index 0000000..f9b9f42
--- /dev/null
+++ b/lib/tsan/benchmarks/mini_bench_shared.cc
@@ -0,0 +1,51 @@
+// Mini-benchmark for tsan: shared memory reads.
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+int len;
+int *a;
+const int kNumIter = 1000;
+
+__attribute__((noinline))
+void Run(int idx) {
+  for (int i = 0, n = len; i < n; i++)
+    if (a[i] != i) abort();
+}
+
+void *Thread(void *arg) {
+  long idx = (long)arg;
+  printf("Thread %ld started\n", idx);
+  for (int i = 0; i < kNumIter; i++)
+    Run(idx);
+  printf("Thread %ld done\n", idx);
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int n_threads = 0;
+  if (argc != 3) {
+    n_threads = 4;
+    len = 1000000;
+  } else {
+    n_threads = atoi(argv[1]);
+    assert(n_threads > 0 && n_threads <= 32);
+    len = atoi(argv[2]);
+  }
+  printf("%s: n_threads=%d len=%d iter=%d\n",
+         __FILE__, n_threads, len, kNumIter);
+  a = new int[len];
+  for (int i = 0, n = len; i < n; i++)
+    a[i] = i;
+  pthread_t *t = new pthread_t[n_threads];
+  for (int i = 0; i < n_threads; i++) {
+    pthread_create(&t[i], 0, Thread, (void*)i);
+  }
+  for (int i = 0; i < n_threads; i++) {
+    pthread_join(t[i], 0);
+  }
+  delete [] t;
+  delete [] a;
+  return 0;
+}
diff --git a/lib/tsan/benchmarks/start_many_threads.cc b/lib/tsan/benchmarks/start_many_threads.cc
new file mode 100644
index 0000000..1e86fa6
--- /dev/null
+++ b/lib/tsan/benchmarks/start_many_threads.cc
@@ -0,0 +1,52 @@
+// Mini-benchmark for creating a lot of threads.
+//
+// Some facts:
+// a) clang -O1 takes <15ms to start N=500 threads,
+//    consuming ~4MB more RAM than N=1.
+// b) clang -O1 -ftsan takes ~26s to start N=500 threads,
+//    eats 5GB more RAM than N=1 (which is somewhat expected but still a lot)
+//    but then it consumes ~4GB of extra memory when the threads shut down!
+//        (definitely not in the barrier_wait interceptor)
+//    Also, it takes 26s to run with N=500 vs just 1.1s to run with N=1.
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+pthread_barrier_t all_threads_ready;
+
+void* Thread(void *unused) {
+  pthread_barrier_wait(&all_threads_ready);
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int n_threads;
+  if (argc == 1) {
+    n_threads = 100;
+  } else if (argc == 2) {
+    n_threads = atoi(argv[1]);
+  } else {
+    printf("Usage: %s n_threads\n", argv[0]);
+    return 1;
+  }
+  printf("%s: n_threads=%d\n", __FILE__, n_threads);
+
+  pthread_barrier_init(&all_threads_ready, NULL, n_threads + 1);
+
+  pthread_t *t = new pthread_t[n_threads];
+  for (int i = 0; i < n_threads; i++) {
+    int status = pthread_create(&t[i], 0, Thread, (void*)i);
+    assert(status == 0);
+  }
+  // sleep(5);  // FIXME: simplify measuring the memory usage.
+  pthread_barrier_wait(&all_threads_ready);
+  for (int i = 0; i < n_threads; i++) {
+    pthread_join(t[i], 0);
+  }
+  // sleep(5);  // FIXME: simplify measuring the memory usage.
+  delete [] t;
+
+  return 0;
+}
diff --git a/lib/tsan/benchmarks/vts_many_threads_bench.cc b/lib/tsan/benchmarks/vts_many_threads_bench.cc
new file mode 100644
index 0000000..f1056e2
--- /dev/null
+++ b/lib/tsan/benchmarks/vts_many_threads_bench.cc
@@ -0,0 +1,120 @@
+// Mini-benchmark for tsan VTS worst case performance
+// Idea:
+// 1) Spawn M + N threads (M >> N)
+//    We'll call the 'M' threads as 'garbage threads'.
+// 2) Make sure all threads have created thus no TIDs were reused
+// 3) Join the garbage threads
+// 4) Do many sync operations on the remaining N threads
+//
+// It turns out that due to O(M+N) VTS complexity the (4) is much slower with
+// when N is large.
+//
+// Some numbers:
+// a) clang++ native O1 with n_iterations=200kk takes
+//      5s regardless of M
+//    clang++ tsanv2 O1 with n_iterations=20kk takes
+//      23.5s with M=200
+//      11.5s with M=1
+//    i.e. tsanv2 is ~23x to ~47x slower than native, depends on M.
+// b) g++ native O1 with n_iterations=200kk takes
+//      5.5s regardless of M
+//    g++ tsanv1 O1 with n_iterations=2kk takes
+//      39.5s with M=200
+//      20.5s with M=1
+//    i.e. tsanv1 is ~370x to ~720x slower than native, depends on M.
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+class __attribute__((aligned(64))) Mutex {
+ public:
+  Mutex()  { pthread_mutex_init(&m_, NULL); }
+  ~Mutex() { pthread_mutex_destroy(&m_); }
+  void Lock() { pthread_mutex_lock(&m_); }
+  void Unlock() { pthread_mutex_unlock(&m_); }
+
+ private:
+  pthread_mutex_t m_;
+};
+
+const int kNumMutexes = 1024;
+Mutex mutexes[kNumMutexes];
+
+int n_threads, n_iterations;
+
+pthread_barrier_t all_threads_ready, main_threads_ready;
+
+void* GarbageThread(void *unused) {
+  pthread_barrier_wait(&all_threads_ready);
+  return 0;
+}
+
+void *Thread(void *arg) {
+  long idx = (long)arg;
+  pthread_barrier_wait(&all_threads_ready);
+
+  // Wait for the main thread to join the garbage threads.
+  pthread_barrier_wait(&main_threads_ready);
+
+  printf("Thread %ld go!\n", idx);
+  int offset = idx * kNumMutexes / n_threads;
+  for (int i = 0; i < n_iterations; i++) {
+    mutexes[(offset + i) % kNumMutexes].Lock();
+    mutexes[(offset + i) % kNumMutexes].Unlock();
+  }
+  printf("Thread %ld done\n", idx);
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int n_garbage_threads;
+  if (argc == 1) {
+    n_threads = 2;
+    n_garbage_threads = 200;
+    n_iterations = 20000000;
+  } else if (argc == 4) {
+    n_threads = atoi(argv[1]);
+    assert(n_threads > 0 && n_threads <= 32);
+    n_garbage_threads = atoi(argv[2]);
+    assert(n_garbage_threads > 0 && n_garbage_threads <= 16000);
+    n_iterations = atoi(argv[3]);
+  } else {
+    printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]);
+    return 1;
+  }
+  printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n",
+         __FILE__, n_threads, n_garbage_threads, n_iterations);
+
+  pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1);
+  pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1);
+
+  pthread_t *t = new pthread_t[n_threads];
+  {
+    pthread_t *g_t = new pthread_t[n_garbage_threads];
+    for (int i = 0; i < n_garbage_threads; i++) {
+      int status = pthread_create(&g_t[i], 0, GarbageThread, NULL);
+      assert(status == 0);
+    }
+    for (int i = 0; i < n_threads; i++) {
+      int status = pthread_create(&t[i], 0, Thread, (void*)i);
+      assert(status == 0);
+    }
+    pthread_barrier_wait(&all_threads_ready);
+    printf("All threads started! Killing the garbage threads.\n");
+    for (int i = 0; i < n_garbage_threads; i++) {
+      pthread_join(g_t[i], 0);
+    }
+    delete [] g_t;
+  }
+  printf("Resuming the main threads.\n");
+  pthread_barrier_wait(&main_threads_ready);
+
+
+  for (int i = 0; i < n_threads; i++) {
+    pthread_join(t[i], 0);
+  }
+  delete [] t;
+  return 0;
+}
diff --git a/lib/tsan/check_analyze.sh b/lib/tsan/check_analyze.sh
new file mode 100755
index 0000000..39d570b
--- /dev/null
+++ b/lib/tsan/check_analyze.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+set -u
+
+RES=$(./analyze_libtsan.sh)
+PrintRes() {
+  printf "%s\n" "$RES"
+}
+
+PrintRes
+
+mops="write1 \
+      write2 \
+      write4 \
+      write8 \
+      read1 \
+      read2 \
+      read4 \
+      read8"
+func="func_entry \
+      func_exit"
+
+check() {
+  res=$(PrintRes | egrep "$1 .* $2 $3; ")
+  if [ "$res" == "" ]; then
+    echo FAILED $1 must contain $2 $3
+    exit 1
+  fi
+}
+
+for f in $mops; do
+  check $f rsp 1   # To read caller pc.
+  check $f push 0
+  check $f pop 0
+done
+
+for f in $func; do
+  check $f rsp 0
+  check $f push 0
+  check $f pop 0
+  check $f call 1  # TraceSwitch()
+done
+
+echo LGTM
diff --git a/lib/tsan/output_tests/free_race.c b/lib/tsan/output_tests/free_race.c
new file mode 100644
index 0000000..24f2120
--- /dev/null
+++ b/lib/tsan/output_tests/free_race.c
@@ -0,0 +1,36 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <unistd.h>
+
+int *mem;
+pthread_mutex_t mtx;
+
+void *Thread1(void *x) {
+  pthread_mutex_lock(&mtx);
+  free(mem);
+  pthread_mutex_unlock(&mtx);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  usleep(1000000);
+  pthread_mutex_lock(&mtx);
+  mem[0] = 42;
+  pthread_mutex_unlock(&mtx);
+  return NULL;
+}
+
+int main() {
+  mem = (int*)malloc(100);
+  pthread_mutex_init(&mtx, 0);
+  pthread_t t;
+  pthread_create(&t, NULL, Thread1, NULL);
+  Thread2(0);
+  pthread_join(t, NULL);
+  pthread_mutex_destroy(&mtx);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/heap_race.cc b/lib/tsan/output_tests/heap_race.cc
new file mode 100644
index 0000000..e92bb37
--- /dev/null
+++ b/lib/tsan/output_tests/heap_race.cc
@@ -0,0 +1,19 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stddef.h>
+
+void *Thread(void *a) {
+  ((int*)a)[0]++;
+  return NULL;
+}
+
+int main() {
+  int *p = new int(42);
+  pthread_t t;
+  pthread_create(&t, NULL, Thread, p);
+  p[0]++;
+  pthread_join(t, NULL);
+  delete p;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/memcpy_race.cc b/lib/tsan/output_tests/memcpy_race.cc
new file mode 100644
index 0000000..c6b79a7
--- /dev/null
+++ b/lib/tsan/output_tests/memcpy_race.cc
@@ -0,0 +1,40 @@
+#include <pthread.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+char *data = new char[10];
+char *data1 = new char[10];
+char *data2 = new char[10];
+
+void *Thread1(void *x) {
+  memcpy(data+5, data1, 1);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  usleep(500*1000);
+  memcpy(data+3, data2, 4);
+  return NULL;
+}
+
+int main() {
+  fprintf(stderr, "addr=%p\n", &data[5]);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: addr=[[ADDR:0x[0-9,a-f]+]]
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 1 at [[ADDR]] by thread 2:
+// CHECK:     #0 memcpy
+// CHECK:     #1 Thread2
+// CHECK:   Previous write of size 1 at [[ADDR]] by thread 1:
+// CHECK:     #0 memcpy
+// CHECK:     #1 Thread1
+
diff --git a/lib/tsan/output_tests/mop_with_offset.cc b/lib/tsan/output_tests/mop_with_offset.cc
new file mode 100644
index 0000000..c785de3
--- /dev/null
+++ b/lib/tsan/output_tests/mop_with_offset.cc
@@ -0,0 +1,35 @@
+#include <pthread.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+
+void *Thread1(void *x) {
+  int *p = (int*)x;
+  p[0] = 1;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  usleep(500*1000);
+  char *p = (char*)x;
+  p[2] = 1;
+  return NULL;
+}
+
+int main() {
+  int data = 42;
+  fprintf(stderr, "ptr1=%p\n", &data);
+  fprintf(stderr, "ptr2=%p\n", (char*)&data + 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, &data);
+  pthread_create(&t[1], NULL, Thread2, &data);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: ptr1=[[PTR1:0x[0-9,a-f]+]]
+// CHECK: ptr2=[[PTR2:0x[0-9,a-f]+]]
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 1 at [[PTR2]] by thread 2:
+// CHECK:   Previous write of size 4 at [[PTR1]] by thread 1:
+
diff --git a/lib/tsan/output_tests/mop_with_offset2.cc b/lib/tsan/output_tests/mop_with_offset2.cc
new file mode 100644
index 0000000..8c0ec07
--- /dev/null
+++ b/lib/tsan/output_tests/mop_with_offset2.cc
@@ -0,0 +1,35 @@
+#include <pthread.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+
+void *Thread1(void *x) {
+  usleep(500*1000);
+  int *p = (int*)x;
+  p[0] = 1;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  char *p = (char*)x;
+  p[2] = 1;
+  return NULL;
+}
+
+int main() {
+  int data = 42;
+  fprintf(stderr, "ptr1=%p\n", &data);
+  fprintf(stderr, "ptr2=%p\n", (char*)&data + 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, &data);
+  pthread_create(&t[1], NULL, Thread2, &data);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: ptr1=[[PTR1:0x[0-9,a-f]+]]
+// CHECK: ptr2=[[PTR2:0x[0-9,a-f]+]]
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4 at [[PTR1]] by thread 1:
+// CHECK:   Previous write of size 1 at [[PTR2]] by thread 2:
+
diff --git a/lib/tsan/output_tests/race_on_barrier.c b/lib/tsan/output_tests/race_on_barrier.c
new file mode 100644
index 0000000..98d7a1d
--- /dev/null
+++ b/lib/tsan/output_tests/race_on_barrier.c
@@ -0,0 +1,31 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <unistd.h>
+
+pthread_barrier_t B;
+int Global;
+
+void *Thread1(void *x) {
+  pthread_barrier_init(&B, 0, 2);
+  pthread_barrier_wait(&B);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  usleep(1000000);
+  pthread_barrier_wait(&B);
+  return NULL;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, NULL, Thread1, NULL);
+  Thread2(0);
+  pthread_join(t, NULL);
+  pthread_barrier_destroy(&B);
+  return 0;
+}
+
+// CHECK:      WARNING: ThreadSanitizer: data race
+
diff --git a/lib/tsan/output_tests/race_on_barrier2.c b/lib/tsan/output_tests/race_on_barrier2.c
new file mode 100644
index 0000000..dbdb6b5
--- /dev/null
+++ b/lib/tsan/output_tests/race_on_barrier2.c
@@ -0,0 +1,30 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <unistd.h>
+
+pthread_barrier_t B;
+int Global;
+
+void *Thread1(void *x) {
+  if (pthread_barrier_wait(&B) == PTHREAD_BARRIER_SERIAL_THREAD)
+    pthread_barrier_destroy(&B);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  if (pthread_barrier_wait(&B) == PTHREAD_BARRIER_SERIAL_THREAD)
+    pthread_barrier_destroy(&B);
+  return NULL;
+}
+
+int main() {
+  pthread_barrier_init(&B, 0, 2);
+  pthread_t t;
+  pthread_create(&t, NULL, Thread1, NULL);
+  Thread2(0);
+  pthread_join(t, NULL);
+  return 0;
+}
+
+// CHECK:      WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/race_on_mutex.c b/lib/tsan/output_tests/race_on_mutex.c
new file mode 100644
index 0000000..90c32ba
--- /dev/null
+++ b/lib/tsan/output_tests/race_on_mutex.c
@@ -0,0 +1,42 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <unistd.h>
+
+pthread_mutex_t Mtx;
+int Global;
+
+void *Thread1(void *x) {
+  pthread_mutex_init(&Mtx, 0);
+  pthread_mutex_lock(&Mtx);
+  Global = 42;
+  pthread_mutex_unlock(&Mtx);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  usleep(1000000);
+  pthread_mutex_lock(&Mtx);
+  Global = 43;
+  pthread_mutex_unlock(&Mtx);
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  pthread_mutex_destroy(&Mtx);
+  return 0;
+}
+
+// CHECK:      WARNING: ThreadSanitizer: data race
+// CHECK-NEXT:   Read of size 1 at {{.*}} by thread 2:
+// CHECK-NEXT:     #0 pthread_mutex_lock {{.*}} ({{.*}})
+// CHECK-NEXT:     #1 Thread2 {{.*}}race_on_mutex.c:19 ({{.*}})
+// CHECK-NEXT:   Previous write of size 1 at {{.*}} by thread 1:
+// CHECK-NEXT:     #0 pthread_mutex_init {{.*}} ({{.*}})
+// CHECK-NEXT:     #1 Thread1 {{.*}}race_on_mutex.c:10 ({{.*}})
+
diff --git a/lib/tsan/output_tests/race_with_finished_thread.cc b/lib/tsan/output_tests/race_with_finished_thread.cc
new file mode 100644
index 0000000..1f60f4b
--- /dev/null
+++ b/lib/tsan/output_tests/race_with_finished_thread.cc
@@ -0,0 +1,43 @@
+#include <pthread.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+// Ensure that we can restore a stack of a finished thread.
+
+int g_data;
+
+void __attribute__((noinline)) foobar(int *p) {
+  *p = 42;
+}
+
+void *Thread1(void *x) {
+  foobar(&g_data);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  usleep(1000*1000);
+  g_data = 43;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 4 at {{.*}} by thread 2:
+// CHECK:   Previous write of size 4 at {{.*}} by thread 1:
+// CHECK:     #0 foobar
+// CHECK:     #1 Thread1
+// CHECK:   Thread 1 (finished) created at:
+// CHECK:     #0 pthread_create
+// CHECK:     #1 main
+
diff --git a/lib/tsan/output_tests/simple_race.c b/lib/tsan/output_tests/simple_race.c
new file mode 100644
index 0000000..ed831fd
--- /dev/null
+++ b/lib/tsan/output_tests/simple_race.c
@@ -0,0 +1,25 @@
+#include <pthread.h>
+#include <stdio.h>
+
+int Global;
+
+void *Thread1(void *x) {
+  Global = 42;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  Global = 43;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/simple_race.cc b/lib/tsan/output_tests/simple_race.cc
new file mode 100644
index 0000000..8d2cabf
--- /dev/null
+++ b/lib/tsan/output_tests/simple_race.cc
@@ -0,0 +1,24 @@
+#include <pthread.h>
+#include <stdio.h>
+
+int Global;
+
+void *Thread1(void *x) {
+  Global++;
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  Global--;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/simple_stack.c b/lib/tsan/output_tests/simple_stack.c
new file mode 100644
index 0000000..ade99da
--- /dev/null
+++ b/lib/tsan/output_tests/simple_stack.c
@@ -0,0 +1,66 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+
+void __attribute__((noinline)) foo1() {
+  Global = 42;
+}
+
+void __attribute__((noinline)) bar1() {
+  volatile int tmp = 42; (void)tmp;
+  foo1();
+}
+
+void __attribute__((noinline)) foo2() {
+  volatile int v = Global; (void)v;
+}
+
+void __attribute__((noinline)) bar2() {
+  volatile int tmp = 42; (void)tmp;
+  foo2();
+}
+
+void *Thread1(void *x) {
+  usleep(1000000);
+  bar1();
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  bar2();
+  return NULL;
+}
+
+void StartThread(pthread_t *t, void *(*f)(void*)) {
+  pthread_create(t, NULL, f, NULL);
+}
+
+int main() {
+  pthread_t t[2];
+  StartThread(&t[0], Thread1);
+  StartThread(&t[1], Thread2);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK:      WARNING: ThreadSanitizer: data race
+// CHECK-NEXT:   Write of size 4 at {{.*}} by thread 1:
+// CHECK-NEXT:     #0 foo1 {{.*}}simple_stack.c:8 ({{.*}})
+// CHECK-NEXT:     #1 bar1 {{.*}}simple_stack.c:13 ({{.*}})
+// CHECK-NEXT:     #2 Thread1 {{.*}}simple_stack.c:27 ({{.*}})
+// CHECK-NEXT:   Previous read of size 4 at {{.*}} by thread 2:
+// CHECK-NEXT:     #0 foo2 {{.*}}simple_stack.c:17 ({{.*}})
+// CHECK-NEXT:     #1 bar2 {{.*}}simple_stack.c:22 ({{.*}})
+// CHECK-NEXT:     #2 Thread2 {{.*}}simple_stack.c:32 ({{.*}})
+// CHECK-NEXT:   Thread 1 (running) created at:
+// CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
+// CHECK-NEXT:     #1 StartThread {{.*}}simple_stack.c:37 ({{.*}})
+// CHECK-NEXT:     #2 main {{.*}}simple_stack.c:42 ({{.*}})
+// CHECK-NEXT:   Thread 2 ({{.*}}) created at:
+// CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
+// CHECK-NEXT:     #1 StartThread {{.*}}simple_stack.c:37 ({{.*}})
+// CHECK-NEXT:     #2 main {{.*}}simple_stack.c:43 ({{.*}})
+
diff --git a/lib/tsan/output_tests/simple_stack2.cc b/lib/tsan/output_tests/simple_stack2.cc
new file mode 100644
index 0000000..d3b03ae
--- /dev/null
+++ b/lib/tsan/output_tests/simple_stack2.cc
@@ -0,0 +1,48 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+
+void __attribute__((noinline)) foo1() {
+  Global = 42;
+}
+
+void __attribute__((noinline)) bar1() {
+  volatile int tmp = 42; (void)tmp;
+  foo1();
+}
+
+void __attribute__((noinline)) foo2() {
+  volatile int v = Global; (void)v;
+}
+
+void __attribute__((noinline)) bar2() {
+  volatile int tmp = 42; (void)tmp;
+  foo2();
+}
+
+void *Thread1(void *x) {
+  usleep(1000000);
+  bar1();
+  return NULL;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, NULL, Thread1, NULL);
+  bar2();
+  pthread_join(t, NULL);
+}
+
+// CHECK:      WARNING: ThreadSanitizer: data race
+// CHECK-NEXT:   Write of size 4 at {{.*}} by thread 1:
+// CHECK-NEXT:     #0 foo1() {{.*}}simple_stack2.cc:8 ({{.*}})
+// CHECK-NEXT:     #1 bar1() {{.*}}simple_stack2.cc:13 ({{.*}})
+// CHECK-NEXT:     #2 Thread1(void*) {{.*}}simple_stack2.cc:27 ({{.*}})
+// CHECK-NEXT:   Previous read of size 4 at {{.*}} by main thread:
+// CHECK-NEXT:     #0 foo2() {{.*}}simple_stack2.cc:17 ({{.*}})
+// CHECK-NEXT:     #1 bar2() {{.*}}simple_stack2.cc:22 ({{.*}})
+// CHECK-NEXT:     #2 main {{.*}}simple_stack2.cc:34 ({{.*}})
+
+
diff --git a/lib/tsan/output_tests/static_init1.cc b/lib/tsan/output_tests/static_init1.cc
new file mode 100644
index 0000000..75d2819
--- /dev/null
+++ b/lib/tsan/output_tests/static_init1.cc
@@ -0,0 +1,25 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+struct P {
+  int x;
+  int y;
+};
+
+void *Thread(void *x) {
+  static P p = {rand(), rand()};
+  if (p.x > RAND_MAX || p.y > RAND_MAX)
+    exit(1);
+  return 0;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], 0, Thread, 0);
+  pthread_create(&t[1], 0, Thread, 0);
+  pthread_join(t[0], 0);
+  pthread_join(t[1], 0);
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/static_init2.cc b/lib/tsan/output_tests/static_init2.cc
new file mode 100644
index 0000000..f6e9596
--- /dev/null
+++ b/lib/tsan/output_tests/static_init2.cc
@@ -0,0 +1,31 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+struct Cache {
+  int x;
+  Cache(int x)
+    : x(x) {
+  }
+};
+
+void foo(Cache *my) {
+  static Cache *c = my ? my : new Cache(rand());
+  if (c->x >= RAND_MAX)
+    exit(1);
+}
+
+void *Thread(void *x) {
+  foo(new Cache(rand()));
+  return 0;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], 0, Thread, 0);
+  pthread_create(&t[1], 0, Thread, 0);
+  pthread_join(t[0], 0);
+  pthread_join(t[1], 0);
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/static_init3.cc b/lib/tsan/output_tests/static_init3.cc
new file mode 100644
index 0000000..718f811
--- /dev/null
+++ b/lib/tsan/output_tests/static_init3.cc
@@ -0,0 +1,46 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sched.h>
+
+struct Cache {
+  int x;
+};
+
+Cache g_cache;
+
+Cache *CreateCache() {
+  g_cache.x = rand();
+  return &g_cache;
+}
+
+_Atomic(Cache*) queue;
+
+void *Thread1(void *x) {
+  static Cache *c = CreateCache();
+  __c11_atomic_store(&queue, c, 0);
+  return 0;
+}
+
+void *Thread2(void *x) {
+  Cache *c = 0;
+  for (;;) {
+    c = __c11_atomic_load(&queue, 0);
+    if (c)
+      break;
+    sched_yield();
+  }
+  if (c->x >= RAND_MAX)
+    exit(1);
+  return 0;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], 0, Thread1, 0);
+  pthread_create(&t[1], 0, Thread2, 0);
+  pthread_join(t[0], 0);
+  pthread_join(t[1], 0);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/static_init4.cc b/lib/tsan/output_tests/static_init4.cc
new file mode 100644
index 0000000..cdacbce
--- /dev/null
+++ b/lib/tsan/output_tests/static_init4.cc
@@ -0,0 +1,35 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sched.h>
+
+struct Cache {
+  int x;
+  Cache(int x)
+    : x(x) {
+  }
+};
+
+int g_other;
+
+Cache *CreateCache() {
+  g_other = rand();
+  return new Cache(rand());
+}
+
+void *Thread1(void *x) {
+  static Cache *c = CreateCache();
+  if (c->x == g_other)
+    exit(1);
+  return 0;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], 0, Thread1, 0);
+  pthread_create(&t[1], 0, Thread1, 0);
+  pthread_join(t[0], 0);
+  pthread_join(t[1], 0);
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/static_init5.cc b/lib/tsan/output_tests/static_init5.cc
new file mode 100644
index 0000000..4b050c9
--- /dev/null
+++ b/lib/tsan/output_tests/static_init5.cc
@@ -0,0 +1,40 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sched.h>
+
+struct Cache {
+  int x;
+  Cache(int x)
+    : x(x) {
+  }
+};
+
+void *AsyncInit(void *p) {
+  return new Cache((int)(long)p);
+}
+
+Cache *CreateCache() {
+  pthread_t t;
+  pthread_create(&t, 0, AsyncInit, (void*)rand());
+  void *res;
+  pthread_join(t, &res);
+  return (Cache*)res;
+}
+
+void *Thread1(void *x) {
+  static Cache *c = CreateCache();
+  if (c->x >= RAND_MAX)
+    exit(1);
+  return 0;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], 0, Thread1, 0);
+  pthread_create(&t[1], 0, Thread1, 0);
+  pthread_join(t[0], 0);
+  pthread_join(t[1], 0);
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/suppress_same_address.cc b/lib/tsan/output_tests/suppress_same_address.cc
new file mode 100644
index 0000000..6e98970
--- /dev/null
+++ b/lib/tsan/output_tests/suppress_same_address.cc
@@ -0,0 +1,27 @@
+#include <pthread.h>
+
+int X;
+
+void *Thread1(void *x) {
+  X = 42;
+  X = 66;
+  X = 78;
+  return 0;
+}
+
+void *Thread2(void *x) {
+  X = 11;
+  X = 99;
+  X = 73;
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread1, 0);
+  Thread2(0);
+  pthread_join(t, 0);
+}
+
+// CHECK: ThreadSanitizer: reported 1 warnings
+
diff --git a/lib/tsan/output_tests/suppress_same_stacks.cc b/lib/tsan/output_tests/suppress_same_stacks.cc
new file mode 100644
index 0000000..6046a4e
--- /dev/null
+++ b/lib/tsan/output_tests/suppress_same_stacks.cc
@@ -0,0 +1,27 @@
+#include <pthread.h>
+
+volatile int N;  // Prevent loop unrolling.
+int **data;
+
+void *Thread1(void *x) {
+  for (int i = 0; i < N; i++)
+    data[i][0] = 42;
+  return 0;
+}
+
+int main() {
+  N = 4;
+  data = new int*[N];
+  for (int i = 0; i < N; i++)
+    data[i] = new int;
+  pthread_t t;
+  pthread_create(&t, 0, Thread1, 0);
+  Thread1(0);
+  pthread_join(t, 0);
+  for (int i = 0; i < N; i++)
+    delete data[i];
+  delete[] data;
+}
+
+// CHECK: ThreadSanitizer: reported 1 warnings
+
diff --git a/lib/tsan/output_tests/suppress_sequence.cc b/lib/tsan/output_tests/suppress_sequence.cc
new file mode 100644
index 0000000..1ce0207
--- /dev/null
+++ b/lib/tsan/output_tests/suppress_sequence.cc
@@ -0,0 +1,26 @@
+#include <pthread.h>
+#include <unistd.h>
+
+volatile int g_data1;
+volatile int g_data2;
+volatile int g_data3;
+volatile int g_data4;
+
+void *Thread1(void *x) {
+  if (x)
+    usleep(1000000);
+  g_data1 = 42;
+  g_data2 = 43;
+  g_data3 = 44;
+  g_data4 = 45;
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread1, (void*)1);
+  Thread1(0);
+  pthread_join(t, 0);
+}
+
+// CHECK: ThreadSanitizer: reported 1 warnings
diff --git a/lib/tsan/output_tests/test_output.sh b/lib/tsan/output_tests/test_output.sh
new file mode 100755
index 0000000..a1edcdd
--- /dev/null
+++ b/lib/tsan/output_tests/test_output.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+ulimit -s 8192;
+set -e # fail on any error
+
+ROOTDIR=`dirname $0`/..
+
+# Assuming clang is in path.
+CC=clang
+CXX=clang++
+
+# TODO: add testing for all of -O0...-O3
+CFLAGS="-fthread-sanitizer -fPIE -O1 -g -fno-builtin -Wall -Werror=return-type"
+LDFLAGS="-pie -lpthread -ldl $ROOTDIR/rtl/libtsan.a"
+if [ "$LLDB" != "" ]; then
+  LDFLAGS+=" -L$LLDB -llldb"
+fi
+
+strip() {
+  grep -v "$1" test.out > test.out2
+  mv -f test.out2 test.out
+}
+
+test_file() {
+  SRC=$1
+  COMPILER=$2
+  echo ----- TESTING $1
+  OBJ=$SRC.o
+  EXE=$SRC.exe
+  $COMPILER $SRC $CFLAGS -c -o $OBJ
+  # Link with CXX, because lldb and suppressions require C++.
+  $CXX $OBJ $LDFLAGS -o $EXE
+  LD_LIBRARY_PATH=$LLDB TSAN_OPTIONS="atexit_sleep_ms=0" $EXE 2> test.out || echo -n
+  if [ "$3" != "" ]; then
+    cat test.out
+  fi
+  echo >>test.out  # FileCheck fails on empty files
+  FileCheck < test.out $SRC
+  if [ "$3" == "" ]; then
+    rm -f $EXE $OBJ test.out *.tmp *.tmp2
+  fi
+}
+
+if [ "$1" == "" ]; then
+  for c in $ROOTDIR/output_tests/*.c; do
+    if [[ $c == */failing_* ]]; then
+      echo SKIPPING FAILING TEST $c
+      continue
+    fi
+    test_file $c $CC
+  done
+  for c in $ROOTDIR/output_tests/*.cc; do
+    if [[ $c == */failing_* ]]; then
+      echo SKIPPING FAILING TEST $c
+      continue
+    fi
+    test_file $c $CXX
+  done
+else
+  test_file $ROOTDIR/output_tests/$1 $CXX "DUMP"
+fi
diff --git a/lib/tsan/output_tests/thread_leak.c b/lib/tsan/output_tests/thread_leak.c
new file mode 100644
index 0000000..88a11be
--- /dev/null
+++ b/lib/tsan/output_tests/thread_leak.c
@@ -0,0 +1,15 @@
+#include <pthread.h>
+
+void *Thread(void *x) {
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  pthread_join(t, 0);
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: thread leak
+
diff --git a/lib/tsan/output_tests/thread_leak2.c b/lib/tsan/output_tests/thread_leak2.c
new file mode 100644
index 0000000..71e9c50
--- /dev/null
+++ b/lib/tsan/output_tests/thread_leak2.c
@@ -0,0 +1,15 @@
+#include <pthread.h>
+
+void *Thread(void *x) {
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  pthread_detach(t);
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: thread leak
+
diff --git a/lib/tsan/output_tests/thread_leak3.c b/lib/tsan/output_tests/thread_leak3.c
new file mode 100644
index 0000000..058b6e5
--- /dev/null
+++ b/lib/tsan/output_tests/thread_leak3.c
@@ -0,0 +1,14 @@
+#include <pthread.h>
+
+void *Thread(void *x) {
+  return 0;
+}
+
+int main() {
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: thread leak
+
diff --git a/lib/tsan/output_tests/vptr_benign_race.cc b/lib/tsan/output_tests/vptr_benign_race.cc
new file mode 100644
index 0000000..fec4ffb
--- /dev/null
+++ b/lib/tsan/output_tests/vptr_benign_race.cc
@@ -0,0 +1,50 @@
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdio.h>
+
+struct A {
+  A() {
+    sem_init(&sem_, 0, 0);
+  }
+  virtual void F() {
+  }
+  void Done() {
+    sem_post(&sem_);
+  }
+  virtual ~A() {
+  }
+  sem_t sem_;
+};
+
+struct B : A {
+  virtual void F() {
+  }
+  virtual ~B() {
+    sem_wait(&sem_);
+    sem_destroy(&sem_);
+  }
+};
+
+static A *obj = new B;
+
+void *Thread1(void *x) {
+  obj->F();
+  obj->Done();
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  delete obj;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "PASS\n");
+}
+// CHECK: PASS
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/output_tests/vptr_harmful_race.cc b/lib/tsan/output_tests/vptr_harmful_race.cc
new file mode 100644
index 0000000..a19e6ab
--- /dev/null
+++ b/lib/tsan/output_tests/vptr_harmful_race.cc
@@ -0,0 +1,48 @@
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdio.h>
+
+struct A {
+  A() {
+    sem_init(&sem_, 0, 0);
+  }
+  virtual void F() {
+  }
+  void Done() {
+    sem_post(&sem_);
+  }
+  virtual ~A() {
+    sem_wait(&sem_);
+    sem_destroy(&sem_);
+  }
+  sem_t sem_;
+};
+
+struct B : A {
+  virtual void F() {
+  }
+  virtual ~B() { }
+};
+
+static A *obj = new B;
+
+void *Thread1(void *x) {
+  obj->F();
+  obj->Done();
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  delete obj;
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
diff --git a/lib/tsan/rtl/Makefile.old b/lib/tsan/rtl/Makefile.old
new file mode 100644
index 0000000..220e6fb
--- /dev/null
+++ b/lib/tsan/rtl/Makefile.old
@@ -0,0 +1,91 @@
+CXXFLAGS = -fPIE -g -Wall -Werror -fno-builtin -DTSAN_DEBUG=$(DEBUG)
+ifeq ($(DEBUG), 0)
+	CXXFLAGS += -O3
+endif
+
+# For interception. FIXME: move interception one level higher.
+INCLUDES= -I../../asan
+EXTRA_CXXFLAGS=-fno-exceptions
+NO_SYSROOT=--sysroot=.
+CXXFLAGS+=$(EXTRA_CXXFLAGS)
+ifeq ($(DEBUG), 0)
+  CXXFLAGS+=-fomit-frame-pointer
+endif
+ifeq ($(CXX), clang++)
+  # Global constructors are banned.
+  CXXFLAGS+=-Wglobal-constructors
+else
+  CXXFLAGS+=-Wframe-larger-than=512
+endif
+
+
+all: libtsan.a
+
+LIBTSAN_HEADERS=tsan_allocator.h \
+                tsan_atomic.h \
+                tsan_clock.h \
+                tsan_compiler.h \
+                tsan_defs.h \
+		tsan_interface.h \
+		tsan_interface_ann.h \
+		tsan_interface_inl.h \
+	        tsan_mman.h \
+                tsan_platform.h \
+                tsan_mutex.h \
+                tsan_report.h \
+		tsan_placement_new.h \
+                tsan_rtl.h \
+		tsan_suppressions.h \
+		tsan_symbolize.h \
+		tsan_sync.h \
+		tsan_trace.h \
+                tsan_vector.h
+
+LIBTSAN_OBJ=tsan_allocator.o \
+            tsan_clock.o \
+            tsan_flags.o \
+            tsan_mutex.o \
+            tsan_interceptors.o \
+            tsan_interface.o \
+            tsan_interface_ann.o \
+            tsan_interface_atomic.o \
+            tsan_md5.o \
+            tsan_mman.o \
+	    tsan_platform_linux.o \
+            tsan_report.o \
+            tsan_printf.o \
+            tsan_rtl.o \
+            tsan_rtl_amd64.o \
+            tsan_rtl_mutex.o \
+            tsan_rtl_report.o \
+            tsan_rtl_thread.o \
+            tsan_stat.o \
+            tsan_suppressions.o \
+            tsan_sync.o \
+            interception_linux.o
+
+ifneq ($(TSAN_SYMB_NULL), )
+  LIBTSAN_OBJ+=tsan_symbolize_null.o
+else ifneq ($(TSAN_SYMB_LLDB), )
+  LIBTSAN_OBJ+=tsan_symbolize_lldb_linux.o
+else
+  LIBTSAN_OBJ+=tsan_symbolize_addr2line_linux.o
+endif
+
+%_linux.o: %_linux.cc Makefile.old $(LIBTSAN_HEADERS)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $<
+
+%.o: %.cc Makefile.old $(LIBTSAN_HEADERS)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) $(NO_SYSROOT) -c $<
+
+%.o: ../../asan/interception/%.cc
+	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ 
+
+libtsan.a: $(LIBTSAN_OBJ)
+	ar ru $@ $(LIBTSAN_OBJ)
+
+libtsan_dummy.a: tsan_dummy_rtl.o
+	ar ru $@ $<
+
+clean:
+	rm -f *.o *.a
diff --git a/lib/tsan/rtl/tsan_allocator.cc b/lib/tsan/rtl/tsan_allocator.cc
new file mode 100644
index 0000000..4b21d1e
--- /dev/null
+++ b/lib/tsan/rtl/tsan_allocator.cc
@@ -0,0 +1,47 @@
+//===-- tsan_allocator-------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_allocator.h"
+
+// Provisional implementation.
+extern "C" void *__libc_malloc(__tsan::uptr size);
+extern "C" void __libc_free(void *ptr);
+
+namespace __tsan {
+
+u64 kBlockMagic = 0x6A6CB03ABCEBC041ull;
+
+void AllocInit() {
+}
+
+void *Alloc(uptr sz) {
+  void *p = __libc_malloc(sz + sizeof(u64));
+  ((u64*)p)[0] = kBlockMagic;
+  return (char*)p + sizeof(u64);
+}
+
+void Free(void *p) {
+  CHECK_NE(p, (char*)0);
+  p = (char*)p - sizeof(u64);
+  CHECK_EQ(((u64*)p)[0], kBlockMagic);
+  ((u64*)p)[0] = 0;
+  __libc_free(p);
+}
+
+void *AllocBlock(void *p) {
+  CHECK_NE(p, (void*)0);
+  u64 *pp = (u64*)((uptr)p & ~0x7);
+  for (; pp[0] != kBlockMagic; pp--) {}
+  return pp + 1;
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_allocator.h b/lib/tsan/rtl/tsan_allocator.h
new file mode 100644
index 0000000..7018bce
--- /dev/null
+++ b/lib/tsan/rtl/tsan_allocator.h
@@ -0,0 +1,29 @@
+//===-- tsan_allocator.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_ALLOCATOR_H
+#define TSAN_ALLOCATOR_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+void AllocInit();
+void *Alloc(uptr sz);
+void Free(void *p);  // Does not accept NULL.
+// Given the pointer p into a valid allocated block,
+// returns a pointer to the beginning of the block.
+void *AllocBlock(void *p);
+
+}  // namespace __tsan
+
+#endif  // TSAN_ALLOCATOR_H
diff --git a/lib/tsan/rtl/tsan_atomic.h b/lib/tsan/rtl/tsan_atomic.h
new file mode 100644
index 0000000..6fcd9f9
--- /dev/null
+++ b/lib/tsan/rtl/tsan_atomic.h
@@ -0,0 +1,140 @@
+//===-- tsan_rtl.h ----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Atomic operations. For now implies IA-32/Intel64.
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_ATOMIC_H
+#define TSAN_ATOMIC_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+const int kCacheLineSize = 64;
+
+enum memory_order {
+  memory_order_relaxed = 1 << 0,
+  memory_order_consume = 1 << 1,
+  memory_order_acquire = 1 << 2,
+  memory_order_release = 1 << 3,
+  memory_order_acq_rel = 1 << 4,
+  memory_order_seq_cst = 1 << 5,
+};
+
+struct atomic_uint32_t {
+  typedef u32 Type;
+  volatile Type val_dont_use;
+};
+
+struct atomic_uint64_t {
+  typedef u64 Type;
+  volatile Type val_dont_use;
+};
+
+struct atomic_uintptr_t {
+  typedef uptr Type;
+  volatile Type val_dont_use;
+};
+
+INLINE void atomic_signal_fence(memory_order) {
+  __asm__ __volatile__("" ::: "memory");
+}
+
+INLINE void atomic_thread_fence(memory_order) {
+  __asm__ __volatile__("mfence" ::: "memory");
+}
+
+INLINE void proc_yield(int cnt) {
+  __asm__ __volatile__("" ::: "memory");
+  for (int i = 0; i < cnt; i++)
+    __asm__ __volatile__("pause");
+  __asm__ __volatile__("" ::: "memory");
+}
+
+template<typename T>
+INLINE typename T::Type atomic_load(
+    const volatile T *a, memory_order mo) {
+  DCHECK(mo & (memory_order_relaxed | memory_order_consume
+      | memory_order_acquire | memory_order_seq_cst));
+  DCHECK(!((uptr)a % sizeof(*a)));
+  typename T::Type v;
+  if (mo == memory_order_relaxed) {
+    v = a->val_dont_use;
+  } else {
+    atomic_signal_fence(memory_order_seq_cst);
+    v = a->val_dont_use;
+    atomic_signal_fence(memory_order_seq_cst);
+  }
+  return v;
+}
+
+template<typename T>
+INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
+  DCHECK(mo & (memory_order_relaxed | memory_order_release
+      | memory_order_seq_cst));
+  DCHECK(!((uptr)a % sizeof(*a)));
+  if (mo == memory_order_relaxed) {
+    a->val_dont_use = v;
+  } else {
+    atomic_signal_fence(memory_order_seq_cst);
+    a->val_dont_use = v;
+    atomic_signal_fence(memory_order_seq_cst);
+  }
+  if (mo == memory_order_seq_cst)
+    atomic_thread_fence(memory_order_seq_cst);
+}
+
+template<typename T>
+INLINE typename T::Type atomic_fetch_add(volatile T *a,
+    typename T::Type v, memory_order mo) {
+  (void)mo;
+  DCHECK(!((uptr)a % sizeof(*a)));
+  return __sync_fetch_and_add(&a->val_dont_use, v);
+}
+
+template<typename T>
+INLINE typename T::Type atomic_fetch_sub(volatile T *a,
+    typename T::Type v, memory_order mo) {
+  (void)mo;
+  DCHECK(!((uptr)a % sizeof(*a)));
+  return __sync_fetch_and_add(&a->val_dont_use, -v);
+}
+
+INLINE uptr atomic_exchange(volatile atomic_uintptr_t *a, uptr v,
+                            memory_order mo) {
+  __asm__ __volatile__("xchg %1, %0" : "+r"(v), "+m"(*a) : : "memory", "cc");
+  return v;
+}
+
+template<typename T>
+INLINE bool atomic_compare_exchange_strong(volatile T *a,
+                                           typename T::Type *cmp,
+                                           typename T::Type xchg,
+                                           memory_order mo) {
+  typedef typename T::Type Type;
+  Type cmpv = *cmp;
+  Type prev = __sync_val_compare_and_swap(&a->val_dont_use, cmpv, xchg);
+  if (prev == cmpv)
+    return true;
+  *cmp = prev;
+  return false;
+}
+
+INLINE bool atomic_compare_exchange_weak(volatile atomic_uintptr_t *a,
+                                         uptr *cmp, uptr xchg,
+                                         memory_order mo) {
+  return atomic_compare_exchange_strong(a, cmp, xchg, mo);
+}
+
+}  // namespace __tsan
+
+#endif  // TSAN_ATOMIC_H
diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
new file mode 100644
index 0000000..2b01632
--- /dev/null
+++ b/lib/tsan/rtl/tsan_clock.cc
@@ -0,0 +1,99 @@
+//===-- tsan_clock.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_clock.h"
+#include "tsan_rtl.h"
+
+// It's possible to optimize clock operations for some important cases
+// so that they are O(1). The cases include singletons, once's, local mutexes.
+// First, SyncClock must be re-implemented to allow indexing by tid.
+// It must not necessarily be a full vector clock, though. For example it may
+// be a multi-level table.
+// Then, each slot in SyncClock must contain a dirty bit (it's united with
+// the clock value, so no space increase). The acquire algorithm looks
+// as follows:
+// void acquire(thr, tid, thr_clock, sync_clock) {
+//   if (!sync_clock[tid].dirty)
+//     return;  // No new info to acquire.
+//              // This handles constant reads of singleton pointers and
+//              // stop-flags.
+//   acquire_impl(thr_clock, sync_clock);  // As usual, O(N).
+//   sync_clock[tid].dirty = false;
+//   sync_clock.dirty_count--;
+// }
+// The release operation looks as follows:
+// void release(thr, tid, thr_clock, sync_clock) {
+//   // thr->sync_cache is a simple fixed-size hash-based cache that holds
+//   // several previous sync_clock's.
+//   if (thr->sync_cache[sync_clock] >= thr->last_acquire_epoch) {
+//     // The thread did no acquire operations since last release on this clock.
+//     // So update only the thread's slot (other slots can't possibly change).
+//     sync_clock[tid].clock = thr->epoch;
+//     if (sync_clock.dirty_count == sync_clock.cnt
+//         || (sync_clock.dirty_count == sync_clock.cnt - 1
+//           && sync_clock[tid].dirty == false))
+//       // All dirty flags are set, bail out.
+//       return;
+//     set all dirty bits, but preserve the thread's bit.  // O(N)
+//     update sync_clock.dirty_count;
+//     return;
+//   }
+//   release_impl(thr_clock, sync_clock);  // As usual, O(N).
+//   set all dirty bits, but preserve the thread's bit.
+//   // The previous step is combined with release_impl(), so that
+//   // we scan the arrays only once.
+//   update sync_clock.dirty_count;
+// }
+
+namespace __tsan {
+
+ThreadClock::ThreadClock() {
+  nclk_ = 0;
+  for (uptr i = 0; i < (uptr)kMaxTid; i++)
+    clk_[i] = 0;
+}
+
+void ThreadClock::acquire(const SyncClock *src) {
+  DCHECK(nclk_ <= kMaxTid);
+  DCHECK(src->clk_.Size() <= kMaxTid);
+
+  const uptr nclk = src->clk_.Size();
+  if (nclk == 0)
+    return;
+  nclk_ = max(nclk_, nclk);
+  for (uptr i = 0; i < nclk; i++) {
+    if (clk_[i] < src->clk_[i])
+      clk_[i] = src->clk_[i];
+  }
+}
+
+void ThreadClock::release(SyncClock *dst) const {
+  DCHECK(nclk_ <= kMaxTid);
+  DCHECK(dst->clk_.Size() <= kMaxTid);
+
+  if (dst->clk_.Size() < nclk_)
+    dst->clk_.Resize(nclk_);
+  for (uptr i = 0; i < nclk_; i++) {
+    if (dst->clk_[i] < clk_[i])
+      dst->clk_[i] = clk_[i];
+  }
+}
+
+void ThreadClock::acq_rel(SyncClock *dst) {
+  acquire(dst);
+  release(dst);
+}
+
+SyncClock::SyncClock()
+  : clk_(MBlockClock) {
+}
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_clock.h b/lib/tsan/rtl/tsan_clock.h
new file mode 100644
index 0000000..c4f8194
--- /dev/null
+++ b/lib/tsan/rtl/tsan_clock.h
@@ -0,0 +1,79 @@
+//===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_CLOCK_H
+#define TSAN_CLOCK_H
+
+#include "tsan_defs.h"
+#include "tsan_vector.h"
+
+namespace __tsan {
+
+// The clock that lives in sync variables (mutexes, atomics, etc).
+class SyncClock {
+ public:
+  SyncClock();
+
+  uptr size() const {
+    return clk_.Size();
+  }
+
+  void Reset() {
+    clk_.Reset();
+  }
+
+ private:
+  Vector<u64> clk_;
+  friend struct ThreadClock;
+};
+
+// The clock that lives in threads.
+struct ThreadClock {
+ public:
+  ThreadClock();
+
+  u64 get(int tid) const {
+    DCHECK(tid < kMaxTid);
+    return clk_[tid];
+  }
+
+  void set(int tid, u64 v) {
+    DCHECK(tid < kMaxTid);
+    DCHECK(v >= clk_[tid]);
+    clk_[tid] = v;
+    if ((int)nclk_ <= tid)
+      nclk_ = tid + 1;
+  }
+
+  void tick(int tid) {
+    DCHECK(tid < kMaxTid);
+    clk_[tid]++;
+    if ((int)nclk_ <= tid)
+      nclk_ = tid + 1;
+  }
+
+  uptr size() const {
+    return nclk_;
+  }
+
+  void acquire(const SyncClock *src);
+  void release(SyncClock *dst) const;
+  void acq_rel(SyncClock *dst);
+
+ private:
+  uptr nclk_;
+  u64 clk_[kMaxTid];
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_CLOCK_H
diff --git a/lib/tsan/rtl/tsan_compiler.h b/lib/tsan/rtl/tsan_compiler.h
new file mode 100644
index 0000000..6aab097
--- /dev/null
+++ b/lib/tsan/rtl/tsan_compiler.h
@@ -0,0 +1,30 @@
+//===-- tsan_rtl.h ----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Compiler-specific definitions.
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_COMPILER_H
+#define TSAN_COMPILER_H
+
+#define INLINE        static inline
+#define NOINLINE      __attribute__((noinline))
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NORETURN      __attribute__((noreturn))
+#define WEAK          __attribute__((weak))
+#define ALIGN(n)      __attribute__((aligned(n)))
+#define LIKELY(x)     __builtin_expect(!!(x), 1)
+#define UNLIKELY(x)   __builtin_expect(!!(x), 0)
+#define THREADLOCAL   __thread
+#define FORMAT(f, a)  __attribute__((format(printf, f, a)))
+#define USED          __attribute__((used))
+
+#endif  // TSAN_COMPILER_H
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
new file mode 100644
index 0000000..d2088d5
--- /dev/null
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -0,0 +1,194 @@
+//===-- tsan_defs.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_DEFS_H
+#define TSAN_DEFS_H
+
+#include "tsan_compiler.h"
+#include "tsan_stat.h"
+
+#ifndef TSAN_DEBUG
+#define TSAN_DEBUG 0
+#endif  // TSAN_DEBUG
+
+namespace __tsan {
+
+typedef unsigned u32;  // NOLINT
+typedef unsigned long long u64;  // NOLINT
+typedef   signed long long s64;  // NOLINT
+typedef unsigned long uptr;  // NOLINT
+
+const uptr kPageSize = 4096;
+const int kTidBits = 16;
+const int kMaxTid = 1 << kTidBits;
+const int kClkBits = 40;
+
+#ifdef TSAN_SHADOW_COUNT
+# if TSAN_SHADOW_COUNT == 2 \
+  || TSAN_SHADOW_COUNT == 4 || TSAN_SHADOW_COUNT == 8
+const unsigned kShadowCnt = TSAN_SHADOW_COUNT;
+# else
+#   error "TSAN_SHADOW_COUNT must be one of 2,4,8"
+# endif
+#else
+// Count of shadow values in a shadow cell.
+const unsigned kShadowCnt = 8;
+#endif
+
+// That many user bytes are mapped onto a single shadow cell.
+const unsigned kShadowCell = 8;
+
+// Size of a single shadow value (u64).
+const unsigned kShadowSize = 8;
+
+#if defined(TSAN_COLLECT_STATS) && TSAN_COLLECT_STATS
+const bool kCollectStats = true;
+#else
+const bool kCollectStats = false;
+#endif
+
+#define CHECK_IMPL(c1, op, c2) \
+  do { \
+    __tsan::u64 v1 = (u64)(c1); \
+    __tsan::u64 v2 = (u64)(c2); \
+    if (!(v1 op v2)) \
+      __tsan::CheckFailed(__FILE__, __LINE__, \
+        "(" #c1 ") " #op " (" #c2 ")", v1, v2); \
+  } while (false) \
+/**/
+
+#define CHECK(a)       CHECK_IMPL((a), !=, 0)
+#define CHECK_EQ(a, b) CHECK_IMPL((a), ==, (b))
+#define CHECK_NE(a, b) CHECK_IMPL((a), !=, (b))
+#define CHECK_LT(a, b) CHECK_IMPL((a), <,  (b))
+#define CHECK_LE(a, b) CHECK_IMPL((a), <=, (b))
+#define CHECK_GT(a, b) CHECK_IMPL((a), >,  (b))
+#define CHECK_GE(a, b) CHECK_IMPL((a), >=, (b))
+
+#if TSAN_DEBUG
+#define DCHECK(a)       CHECK(a)
+#define DCHECK_EQ(a, b) CHECK_EQ(a, b)
+#define DCHECK_NE(a, b) CHECK_NE(a, b)
+#define DCHECK_LT(a, b) CHECK_LT(a, b)
+#define DCHECK_LE(a, b) CHECK_LE(a, b)
+#define DCHECK_GT(a, b) CHECK_GT(a, b)
+#define DCHECK_GE(a, b) CHECK_GE(a, b)
+#else
+#define DCHECK(a)
+#define DCHECK_EQ(a, b)
+#define DCHECK_NE(a, b)
+#define DCHECK_LT(a, b)
+#define DCHECK_LE(a, b)
+#define DCHECK_GT(a, b)
+#define DCHECK_GE(a, b)
+#endif
+
+void CheckFailed(const char *file, int line, const char *cond, u64 v1, u64 v2);
+
+// The following "build consistency" machinery ensures that all source files
+// are built in the same configuration. Inconsistent builds lead to
+// hard to debug crashes.
+#if TSAN_DEBUG
+void build_consistency_debug();
+#else
+void build_consistency_release();
+#endif
+
+#if TSAN_COLLECT_STATS
+void build_consistency_stats();
+#else
+void build_consistency_nostats();
+#endif
+
+#if TSAN_SHADOW_COUNT == 1
+void build_consistency_shadow1();
+#elif TSAN_SHADOW_COUNT == 2
+void build_consistency_shadow2();
+#elif TSAN_SHADOW_COUNT == 4
+void build_consistency_shadow4();
+#else
+void build_consistency_shadow8();
+#endif
+
+static inline void USED build_consistency() {
+#if TSAN_DEBUG
+  void(*volatile cfg)() = &build_consistency_debug;
+#else
+  void(*volatile cfg)() = &build_consistency_release;
+#endif
+#if TSAN_COLLECT_STATS
+  void(*volatile stats)() = &build_consistency_stats;
+#else
+  void(*volatile stats)() = &build_consistency_nostats;
+#endif
+#if TSAN_SHADOW_COUNT == 1
+  void(*volatile shadow)() = &build_consistency_shadow1;
+#elif TSAN_SHADOW_COUNT == 2
+  void(*volatile shadow)() = &build_consistency_shadow2;
+#elif TSAN_SHADOW_COUNT == 4
+  void(*volatile shadow)() = &build_consistency_shadow4;
+#else
+  void(*volatile shadow)() = &build_consistency_shadow8;
+#endif
+  (void)cfg;
+  (void)stats;
+  (void)shadow;
+}
+
+template<typename T>
+T min(T a, T b) {
+  return a < b ? a : b;
+}
+
+template<typename T>
+T max(T a, T b) {
+  return a > b ? a : b;
+}
+
+template<typename T>
+T RoundUp(T p, int align) {
+  DCHECK_EQ(align & (align - 1), 0);
+  return (T)(((u64)p + align - 1) & ~(align - 1));
+}
+
+void internal_memset(void *ptr, int c, uptr size);
+void internal_memcpy(void *dst, const void *src, uptr size);
+int internal_memcmp(const void *s1, const void *s2, uptr size);
+int internal_strcmp(const char *s1, const char *s2);
+int internal_strncmp(const char *s1, const char *s2, uptr size);
+void internal_strcpy(char *s1, const char *s2);
+uptr internal_strlen(const char *s);
+char* internal_strdup(const char *s);
+const char *internal_strstr(const char *where, const char *what);
+const char *internal_strchr(const char *where, char what);
+
+struct MD5Hash {
+  u64 hash[2];
+  bool operator==(const MD5Hash &other) const {
+    return hash[0] == other.hash[0] && hash[1] == other.hash[1];
+  }
+};
+
+MD5Hash md5_hash(const void *data, uptr size);
+
+struct ThreadState;
+struct ThreadContext;
+struct Context;
+struct ReportStack;
+class ReportDesc;
+class RegionAlloc;
+class StackTrace;
+
+}  // namespace __tsan
+
+#endif  // TSAN_DEFS_H
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
new file mode 100644
index 0000000..77abd5b
--- /dev/null
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -0,0 +1,143 @@
+//===-- tsan_flags.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_flags.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+
+namespace __tsan {
+
+static void Flag(const char *env, bool *flag, const char *name);
+static void Flag(const char *env, int *flag, const char *name);
+static void Flag(const char *env, const char **flag, const char *name);
+
+Flags *flags() {
+  return &CTX()->flags;
+}
+
+// Can be overriden in frontend.
+void WEAK OverrideFlags(Flags *f) {
+  (void)f;
+}
+
+void InitializeFlags(Flags *f, const char *env) {
+  internal_memset(f, 0, sizeof(*f));
+
+  // Default values.
+  f->enable_annotations = true;
+  f->suppress_equal_stacks = true;
+  f->suppress_equal_addresses = true;
+  f->report_thread_leaks = true;
+  f->report_signal_unsafe = true;
+  f->force_seq_cst_atomics = false;
+  f->strip_path_prefix = internal_strdup("");
+  f->suppressions = internal_strdup("");
+  f->exitcode = 66;
+  f->log_fileno = 2;
+  f->atexit_sleep_ms = 1000;
+  f->verbosity = 0;
+
+  // Let a frontend override.
+  OverrideFlags(f);
+
+  // Override from command line.
+  Flag(env, &f->enable_annotations, "enable_annotations");
+  Flag(env, &f->suppress_equal_stacks, "suppress_equal_stacks");
+  Flag(env, &f->suppress_equal_addresses, "suppress_equal_addresses");
+  Flag(env, &f->report_thread_leaks, "report_thread_leaks");
+  Flag(env, &f->report_signal_unsafe, "report_signal_unsafe");
+  Flag(env, &f->force_seq_cst_atomics, "force_seq_cst_atomics");
+  Flag(env, &f->strip_path_prefix, "strip_path_prefix");
+  Flag(env, &f->suppressions, "suppressions");
+  Flag(env, &f->exitcode, "exitcode");
+  Flag(env, &f->log_fileno, "log_fileno");
+  Flag(env, &f->atexit_sleep_ms, "atexit_sleep_ms");
+  Flag(env, &f->verbosity, "verbosity");
+}
+
+void FinalizeFlags(Flags *flags) {
+  internal_free((void*)flags->strip_path_prefix);
+  internal_free((void*)flags->suppressions);
+}
+
+static const char *GetFlagValue(const char *env, const char *name,
+                                const char **end) {
+  if (env == 0)
+    return *end = 0;
+  const char *pos = internal_strstr(env, name);
+  if (pos == 0)
+    return *end = 0;
+  pos += internal_strlen(name);
+  if (pos[0] != '=')
+    return *end = pos;
+  pos += 1;
+  if (pos[0] == '"') {
+    pos += 1;
+    *end = internal_strchr(pos, '"');
+  } else if (pos[0] == '\'') {
+    pos += 1;
+    *end = internal_strchr(pos, '\'');
+  } else {
+    *end = internal_strchr(pos, ' ');
+  }
+  if (*end == 0)
+    *end = pos + internal_strlen(pos);
+  return pos;
+}
+
+static void Flag(const char *env, bool *flag, const char *name) {
+  const char *end = 0;
+  const char *val = GetFlagValue(env, name, &end);
+  if (val == 0)
+    return;
+  int len = end - val;
+  if (len == 1 && val[0] == '0')
+    *flag = false;
+  else if (len == 1 && val[0] == '1')
+    *flag = true;
+}
+
+static void Flag(const char *env, int *flag, const char *name) {
+  const char *end = 0;
+  const char *val = GetFlagValue(env, name, &end);
+  if (val == 0)
+    return;
+  bool minus = false;
+  if (val != end && val[0] == '-') {
+    minus = true;
+    val += 1;
+  }
+  int v = 0;
+  for (; val != end; val++) {
+    if (val[0] < '0' || val[0] > '9')
+      break;
+    v = v * 10 + val[0] - '0';
+  }
+  if (minus)
+    v = -v;
+  *flag = v;
+}
+
+static void Flag(const char *env, const char **flag, const char *name) {
+  const char *end = 0;
+  const char *val = GetFlagValue(env, name, &end);
+  if (val == 0)
+    return;
+  int len = end - val;
+  char *f = (char*)internal_alloc(MBlockFlag, len + 1);
+  internal_memcpy(f, val, len);
+  f[len] = 0;
+  *flag = f;
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_flags.h b/lib/tsan/rtl/tsan_flags.h
new file mode 100644
index 0000000..1bad405
--- /dev/null
+++ b/lib/tsan/rtl/tsan_flags.h
@@ -0,0 +1,56 @@
+//===-- tsan_flags.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_FLAGS_H
+#define TSAN_FLAGS_H
+
+namespace __tsan {
+
+struct Flags {
+  // Enable dynamic annotations, otherwise they are no-ops.
+  bool enable_annotations;
+  // Supress a race report if we've already output another race report
+  // with the same stack.
+  bool suppress_equal_stacks;
+  // Supress a race report if we've already output another race report
+  // on the same address.
+  bool suppress_equal_addresses;
+  // Report thread leaks at exit?
+  bool report_thread_leaks;
+  // Report violations of async signal-safety
+  // (e.g. malloc() call from a signal handler).
+  bool report_signal_unsafe;
+  // If set, all atomics are effectively sequentially consistent (seq_cst),
+  // regardless of what user actually specified.
+  bool force_seq_cst_atomics;
+  // Strip that prefix from file paths in reports.
+  const char *strip_path_prefix;
+  // Suppressions filename.
+  const char *suppressions;
+  // Override exit status if something was reported.
+  int exitcode;
+  // Log fileno (1 - stdout, 2 - stderr).
+  int log_fileno;
+  // Sleep in main thread before exiting for that many ms
+  // (useful to catch "at exit" races).
+  int atexit_sleep_ms;
+  // Verbosity level (0 - silent, 1 - a bit of output, 2+ - more output).
+  int verbosity;
+};
+
+Flags *flags();
+void InitializeFlags(Flags *flags, const char *env);
+void FinalizeFlags(Flags *flags);
+}
+
+#endif  // TSAN_FLAGS_H
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
new file mode 100644
index 0000000..3d22181
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -0,0 +1,1402 @@
+//===-- tsan_interceptors_linux.cc ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "interception/interception.h"
+#include "tsan_rtl.h"
+#include "tsan_interface.h"
+#include "tsan_atomic.h"
+#include "tsan_platform.h"
+#include "tsan_mman.h"
+#include "tsan_placement_new.h"
+
+using namespace __tsan;  // NOLINT
+
+struct sigset_t {
+  u64 val[kSigCount / 8 / sizeof(u64)];
+};
+
+struct ucontext_t {
+  u64 opaque[1024];
+};
+
+extern "C" int pthread_attr_init(void *attr);
+extern "C" int pthread_attr_destroy(void *attr);
+extern "C" int pthread_attr_getdetachstate(void *attr, int *v);
+extern "C" int pthread_attr_setstacksize(void *attr, uptr stacksize);
+extern "C" int pthread_attr_getstacksize(void *attr, uptr *stacksize);
+extern "C" int pthread_key_create(unsigned *key, void (*destructor)(void* v));
+extern "C" int pthread_setspecific(unsigned key, const void *v);
+extern "C" int pthread_mutexattr_gettype(void *a, int *type);
+extern "C" int pthread_yield();
+extern "C" int pthread_sigmask(int how, const sigset_t *set, sigset_t *oldset);
+extern "C" int sigfillset(sigset_t *set);
+extern "C" void *pthread_self();
+extern "C" int getcontext(ucontext_t *ucp);
+extern "C" void _exit(int status);
+extern "C" int __cxa_atexit(void (*func)(void *arg), void *arg, void *dso);
+extern "C" int *__errno_location();
+extern "C" int usleep(unsigned usec);
+const int PTHREAD_MUTEX_RECURSIVE = 1;
+const int PTHREAD_MUTEX_RECURSIVE_NP = 1;
+const int kPthreadAttrSize = 56;
+const int EINVAL = 22;
+const int EBUSY = 16;
+const int EPOLL_CTL_ADD = 1;
+void *const MAP_FAILED = (void*)-1;
+const int PTHREAD_BARRIER_SERIAL_THREAD = -1;
+const int MAP_FIXED = 0x10;
+typedef long long_t;  // NOLINT
+
+typedef void (*sighandler_t)(int sig);
+
+union pthread_attr_t {
+  char size[kPthreadAttrSize];
+  void *align;
+};
+
+struct sigaction_t {
+  union {
+    sighandler_t sa_handler;
+    void (*sa_sigaction)(int sig, my_siginfo_t *siginfo, void *uctx);
+  };
+  sigset_t sa_mask;
+  int sa_flags;
+  void (*sa_restorer)();
+};
+
+const sighandler_t SIG_DFL = (sighandler_t)0;
+const sighandler_t SIG_IGN = (sighandler_t)1;
+const int SA_SIGINFO = 4;
+const int SIG_SETMASK = 2;
+
+static sigaction_t sigactions[kSigCount];
+
+static unsigned g_thread_finalize_key;
+
+static void process_pending_signals(ThreadState *thr);
+
+class ScopedInterceptor {
+ public:
+  ScopedInterceptor(ThreadState *thr, const char *fname, uptr pc)
+      : thr_(thr)
+      , in_rtl_(thr->in_rtl) {
+    if (thr_->in_rtl == 0) {
+      Initialize(thr);
+      FuncEntry(thr, pc);
+      thr_->in_rtl++;
+      DPrintf("#%d: intercept %s()\n", thr_->tid, fname);
+    } else {
+      thr_->in_rtl++;
+    }
+  }
+
+  ~ScopedInterceptor() {
+    thr_->in_rtl--;
+    if (thr_->in_rtl == 0) {
+      FuncExit(thr_);
+      process_pending_signals(thr_);
+    }
+    CHECK_EQ(in_rtl_, thr_->in_rtl);
+  }
+
+ private:
+  ThreadState *const thr_;
+  const int in_rtl_;
+};
+
+#define SCOPED_INTERCEPTOR_RAW(func, ...) \
+    ThreadState *thr = cur_thread(); \
+    StatInc(thr, StatInterceptor); \
+    StatInc(thr, StatInt_##func); \
+    ScopedInterceptor si(thr, #func, \
+        (__tsan::uptr)__builtin_return_address(0)); \
+    const uptr pc = (uptr)&func; \
+    (void)pc; \
+/**/
+
+#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
+    SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
+    if (thr->in_rtl > 1) \
+      return REAL(func)(__VA_ARGS__); \
+/**/
+
+#define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
+#define TSAN_INTERCEPT(func) \
+    if (!INTERCEPT_FUNCTION(func)) \
+      Printf("ThreadSanitizer: failed to intercept '" #func "' function\n"); \
+/**/
+
+class AtExitContext {
+ public:
+  AtExitContext()
+    : mtx_(MutexTypeAtExit, StatMtxAtExit)
+    , pos_() {
+  }
+
+  typedef void(*atexit_t)();
+
+  int atexit(ThreadState *thr, uptr pc, atexit_t f) {
+    Lock l(&mtx_);
+    if (pos_ == kMaxAtExit)
+      return 1;
+    Release(thr, pc, (uptr)this);
+    stack_[pos_] = f;
+    pos_++;
+    return 0;
+  }
+
+  void exit(ThreadState *thr, uptr pc) {
+    CHECK_EQ(thr->in_rtl, 0);
+    for (;;) {
+      atexit_t f = 0;
+      {
+        Lock l(&mtx_);
+        if (pos_) {
+          pos_--;
+          f = stack_[pos_];
+          ScopedInRtl in_rtl;
+          Acquire(thr, pc, (uptr)this);
+        }
+      }
+      if (f == 0)
+        break;
+      DPrintf("#%d: executing atexit func %p\n", thr->tid, f);
+      CHECK_EQ(thr->in_rtl, 0);
+      f();
+    }
+  }
+
+ private:
+  static const int kMaxAtExit = 128;
+  Mutex mtx_;
+  atexit_t stack_[kMaxAtExit];
+  int pos_;
+};
+
+static AtExitContext *atexit_ctx;
+
+static void finalize(void *arg) {
+  ThreadState * thr = cur_thread();
+  uptr pc = 0;
+  atexit_ctx->exit(thr, pc);
+  {
+    ScopedInRtl in_rtl;
+    DestroyAndFree(atexit_ctx);
+    usleep(flags()->atexit_sleep_ms * 1000);
+  }
+  int status = Finalize(cur_thread());
+  _exit(status);
+}
+
+TSAN_INTERCEPTOR(int, atexit, void (*f)()) {
+  SCOPED_TSAN_INTERCEPTOR(atexit, f);
+  return atexit_ctx->atexit(thr, pc, f);
+  return 0;
+}
+
+static uptr fd2addr(int fd) {
+  (void)fd;
+  static u64 addr;
+  return (uptr)&addr;
+}
+
+static uptr epollfd2addr(int fd) {
+  (void)fd;
+  static u64 addr;
+  return (uptr)&addr;
+}
+
+static uptr file2addr(char *path) {
+  (void)path;
+  static u64 addr;
+  return (uptr)&addr;
+}
+
+static uptr dir2addr(char *path) {
+  (void)path;
+  static u64 addr;
+  return (uptr)&addr;
+}
+
+TSAN_INTERCEPTOR(void*, malloc, uptr size) {
+  SCOPED_INTERCEPTOR_RAW(malloc, size);
+  return user_alloc(thr, pc, size);
+}
+
+TSAN_INTERCEPTOR(void*, calloc, uptr size, uptr n) {
+  SCOPED_INTERCEPTOR_RAW(calloc, size, n);
+  void *p = user_alloc(thr, pc, n * size);
+  internal_memset(p, 0, n * size);
+  return p;
+}
+
+TSAN_INTERCEPTOR(void*, realloc, void *p, uptr size) {
+  SCOPED_INTERCEPTOR_RAW(realloc, p, size);
+  return user_realloc(thr, pc, p, size);
+}
+
+TSAN_INTERCEPTOR(void, free, void *p) {
+  if (p == 0)
+    return;
+  SCOPED_INTERCEPTOR_RAW(free, p);
+  user_free(thr, pc, p);
+}
+
+TSAN_INTERCEPTOR(void, cfree, void *p) {
+  if (p == 0)
+    return;
+  SCOPED_INTERCEPTOR_RAW(cfree, p);
+  user_free(thr, pc, p);
+}
+
+TSAN_INTERCEPTOR(uptr, strlen, const void *s) {
+  SCOPED_TSAN_INTERCEPTOR(strlen, s);
+  uptr len = REAL(strlen)(s);
+  MemoryAccessRange(thr, pc, (uptr)s, len + 1, false);
+  return len;
+}
+
+TSAN_INTERCEPTOR(void*, memset, void *dst, int v, uptr size) {
+  SCOPED_TSAN_INTERCEPTOR(memset, dst, v, size);
+  MemoryAccessRange(thr, pc, (uptr)dst, size, true);
+  return REAL(memset)(dst, v, size);
+}
+
+TSAN_INTERCEPTOR(void*, memcpy, void *dst, const void *src, uptr size) {
+  SCOPED_TSAN_INTERCEPTOR(memcpy, dst, src, size);
+  MemoryAccessRange(thr, pc, (uptr)dst, size, true);
+  MemoryAccessRange(thr, pc, (uptr)src, size, false);
+  return REAL(memcpy)(dst, src, size);
+}
+
+TSAN_INTERCEPTOR(int, strcmp, const char *s1, const char *s2) {
+  SCOPED_TSAN_INTERCEPTOR(strcmp, s1, s2);
+  uptr len = 0;
+  for (; s1[len] && s2[len]; len++) {
+    if (s1[len] != s2[len])
+      break;
+  }
+  MemoryAccessRange(thr, pc, (uptr)s1, len + 1, false);
+  MemoryAccessRange(thr, pc, (uptr)s2, len + 1, false);
+  return s1[len] - s2[len];
+}
+
+TSAN_INTERCEPTOR(int, strncmp, const char *s1, const char *s2, uptr n) {
+  SCOPED_TSAN_INTERCEPTOR(strncmp, s1, s2, n);
+  uptr len = 0;
+  for (; s1[len] && s2[len] && len < n; len++) {
+    if (s1[len] != s2[len])
+      break;
+  }
+  MemoryAccessRange(thr, pc, (uptr)s1, len < n ? len + 1 : n, false);
+  MemoryAccessRange(thr, pc, (uptr)s2, len < n ? len + 1 : n, false);
+  return len == n ? 0 : s1[len] - s2[len];
+}
+
+TSAN_INTERCEPTOR(void*, memchr, void *s, int c, uptr n) {
+  SCOPED_TSAN_INTERCEPTOR(memchr, s, c, n);
+  void *res = REAL(memchr)(s, c, n);
+  uptr len = res ? (char*)res - (char*)s + 1 : n;
+  MemoryAccessRange(thr, pc, (uptr)s, len, false);
+  return res;
+}
+
+TSAN_INTERCEPTOR(void*, memrchr, char *s, int c, uptr n) {
+  SCOPED_TSAN_INTERCEPTOR(memrchr, s, c, n);
+  MemoryAccessRange(thr, pc, (uptr)s, n, false);
+  return REAL(memrchr)(s, c, n);
+}
+
+TSAN_INTERCEPTOR(void*, memmove, void *dst, void *src, uptr n) {
+  SCOPED_TSAN_INTERCEPTOR(memmove, dst, src, n);
+  MemoryAccessRange(thr, pc, (uptr)dst, n, true);
+  MemoryAccessRange(thr, pc, (uptr)src, n, false);
+  return REAL(memmove)(dst, src, n);
+}
+
+TSAN_INTERCEPTOR(int, memcmp, const void *s1, const void *s2, uptr n) {
+  SCOPED_TSAN_INTERCEPTOR(memcmp, s1, s2, n);
+  int res = 0;
+  uptr len = 0;
+  for (; len < n; len++) {
+    if ((res = ((unsigned char*)s1)[len] - ((unsigned char*)s2)[len]))
+      break;
+  }
+  MemoryAccessRange(thr, pc, (uptr)s1, len < n ? len + 1 : n, false);
+  MemoryAccessRange(thr, pc, (uptr)s2, len < n ? len + 1 : n, false);
+  return res;
+}
+
+TSAN_INTERCEPTOR(void*, strchr, void *s, int c) {
+  SCOPED_TSAN_INTERCEPTOR(strchr, s, c);
+  void *res = REAL(strchr)(s, c);
+  uptr len = res ? (char*)res - (char*)s + 1 : REAL(strlen)(s) + 1;
+  MemoryAccessRange(thr, pc, (uptr)s, len, false);
+  return res;
+}
+
+TSAN_INTERCEPTOR(void*, strchrnul, void *s, int c) {
+  SCOPED_TSAN_INTERCEPTOR(strchrnul, s, c);
+  void *res = REAL(strchrnul)(s, c);
+  uptr len = (char*)res - (char*)s + 1;
+  MemoryAccessRange(thr, pc, (uptr)s, len, false);
+  return res;
+}
+
+TSAN_INTERCEPTOR(void*, strrchr, void *s, int c) {
+  SCOPED_TSAN_INTERCEPTOR(strrchr, s, c);
+  MemoryAccessRange(thr, pc, (uptr)s, REAL(strlen)(s) + 1, false);
+  return REAL(strrchr)(s, c);
+}
+
+TSAN_INTERCEPTOR(void*, strcpy, void *dst, const void *src) {  // NOLINT
+  SCOPED_TSAN_INTERCEPTOR(strcpy, dst, src);  // NOLINT
+  uptr srclen = REAL(strlen)(src);
+  MemoryAccessRange(thr, pc, (uptr)dst, srclen + 1, true);
+  MemoryAccessRange(thr, pc, (uptr)src, srclen + 1, false);
+  return REAL(strcpy)(dst, src);  // NOLINT
+}
+
+TSAN_INTERCEPTOR(void*, strncpy, void *dst, void *src, uptr n) {
+  SCOPED_TSAN_INTERCEPTOR(strncpy, dst, src, n);
+  uptr srclen = REAL(strlen)(src);
+  MemoryAccessRange(thr, pc, (uptr)dst, n, true);
+  MemoryAccessRange(thr, pc, (uptr)src, min(srclen + 1, n), false);
+  return REAL(strncpy)(dst, src, n);
+}
+
+TSAN_INTERCEPTOR(const char*, strstr, const char *s1, const char *s2) {
+  SCOPED_TSAN_INTERCEPTOR(strstr, s1, s2);
+  const char *res = REAL(strstr)(s1, s2);
+  uptr len1 = REAL(strlen)(s1);
+  uptr len2 = REAL(strlen)(s2);
+  MemoryAccessRange(thr, pc, (uptr)s1, len1 + 1, false);
+  MemoryAccessRange(thr, pc, (uptr)s2, len2 + 1, false);
+  return res;
+}
+
+static bool fix_mmap_addr(void **addr, long_t sz, int flags) {
+  if (*addr) {
+    if (!IsAppMem((uptr)*addr) || !IsAppMem((uptr)*addr + sz - 1)) {
+      if (flags & MAP_FIXED) {
+        *__errno_location() = EINVAL;
+        return false;
+      } else {
+        *addr = 0;
+      }
+    }
+  }
+  return true;
+}
+
+TSAN_INTERCEPTOR(void*, mmap, void *addr, long_t sz, int prot,
+                         int flags, int fd, unsigned off) {
+  SCOPED_TSAN_INTERCEPTOR(mmap, addr, sz, prot, flags, fd, off);
+  if (!fix_mmap_addr(&addr, sz, flags))
+    return MAP_FAILED;
+  void *res = REAL(mmap)(addr, sz, prot, flags, fd, off);
+  if (res != MAP_FAILED) {
+    MemoryResetRange(thr, pc, (uptr)res, sz);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(void*, mmap64, void *addr, long_t sz, int prot,
+                           int flags, int fd, u64 off) {
+  SCOPED_TSAN_INTERCEPTOR(mmap64, addr, sz, prot, flags, fd, off);
+  if (!fix_mmap_addr(&addr, sz, flags))
+    return MAP_FAILED;
+  void *res = REAL(mmap64)(addr, sz, prot, flags, fd, off);
+  if (res != MAP_FAILED) {
+    MemoryResetRange(thr, pc, (uptr)res, sz);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, munmap, void *addr, long_t sz) {
+  SCOPED_TSAN_INTERCEPTOR(munmap, addr, sz);
+  int res = REAL(munmap)(addr, sz);
+  return res;
+}
+
+#ifdef __LP64__
+
+// void *operator new(size_t)
+TSAN_INTERCEPTOR(void*, _Znwm, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(_Znwm, sz);
+  return user_alloc(thr, pc, sz);
+}
+
+// void *operator new(size_t, nothrow_t)
+TSAN_INTERCEPTOR(void*, _ZnwmRKSt9nothrow_t, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(_ZnwmRKSt9nothrow_t, sz);
+  return user_alloc(thr, pc, sz);
+}
+
+// void *operator new[](size_t)
+TSAN_INTERCEPTOR(void*, _Znam, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(_Znam, sz);
+  return user_alloc(thr, pc, sz);
+}
+
+// void *operator new[](size_t, nothrow_t)
+TSAN_INTERCEPTOR(void*, _ZnamRKSt9nothrow_t, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(_ZnamRKSt9nothrow_t, sz);
+  return user_alloc(thr, pc, sz);
+}
+
+#else
+#error "Not implemented"
+#endif
+
+// void operator delete(void*)
+TSAN_INTERCEPTOR(void, _ZdlPv, void *p) {
+  if (p == 0)
+    return;
+  SCOPED_TSAN_INTERCEPTOR(_ZdlPv, p);
+  user_free(thr, pc, p);
+}
+
+// void operator delete(void*, nothrow_t)
+TSAN_INTERCEPTOR(void, _ZdlPvRKSt9nothrow_t, void *p) {
+  if (p == 0)
+    return;
+  SCOPED_TSAN_INTERCEPTOR(_ZdlPvRKSt9nothrow_t, p);
+  user_free(thr, pc, p);
+}
+
+// void operator delete[](void*)
+TSAN_INTERCEPTOR(void, _ZdaPv, void *p) {
+  if (p == 0)
+    return;
+  SCOPED_TSAN_INTERCEPTOR(_ZdaPv, p);
+  user_free(thr, pc, p);
+}
+
+// void operator delete[](void*, nothrow_t)
+TSAN_INTERCEPTOR(void, _ZdaPvRKSt9nothrow_t, void *p) {
+  if (p == 0)
+    return;
+  SCOPED_TSAN_INTERCEPTOR(_ZdaPvRKSt9nothrow_t, p);
+  user_free(thr, pc, p);
+}
+
+TSAN_INTERCEPTOR(void*, memalign, uptr align, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(memalign, align, sz);
+  return user_alloc_aligned(thr, pc, sz, align);
+}
+
+TSAN_INTERCEPTOR(void*, valloc, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(valloc, sz);
+  return user_alloc_aligned(thr, pc, sz, kPageSize);
+}
+
+TSAN_INTERCEPTOR(void*, pvalloc, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(pvalloc, sz);
+  sz = RoundUp(sz, kPageSize);
+  return user_alloc_aligned(thr, pc, sz, kPageSize);
+}
+
+TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) {
+  SCOPED_TSAN_INTERCEPTOR(posix_memalign, memptr, align, sz);
+  *memptr = user_alloc_aligned(thr, pc, sz, align);
+  return 0;
+}
+
+// Used in thread-safe function static initialization.
+TSAN_INTERCEPTOR(int, __cxa_guard_acquire, char *m) {
+  SCOPED_TSAN_INTERCEPTOR(__cxa_guard_acquire, m);
+  int res = REAL(__cxa_guard_acquire)(m);
+  if (res) {
+    // This thread does the init.
+  } else {
+    Acquire(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(void, __cxa_guard_release, char *m) {
+  SCOPED_TSAN_INTERCEPTOR(__cxa_guard_release, m);
+  Release(thr, pc, (uptr)m);
+  REAL(__cxa_guard_release)(m);
+}
+
+static void thread_finalize(void *v) {
+  uptr iter = (uptr)v;
+  if (iter > 1) {
+    if (pthread_setspecific(g_thread_finalize_key, (void*)(iter - 1))) {
+      Printf("ThreadSanitizer: failed to set thread key\n");
+      Die();
+    }
+    return;
+  }
+  {
+    ScopedInRtl in_rtl;
+    ThreadFinish(cur_thread());
+  }
+}
+
+
+struct ThreadParam {
+  void* (*callback)(void *arg);
+  void *param;
+  atomic_uintptr_t tid;
+};
+
+extern "C" void *__tsan_thread_start_func(void *arg) {
+  ThreadParam *p = (ThreadParam*)arg;
+  void* (*callback)(void *arg) = p->callback;
+  void *param = p->param;
+  int tid = 0;
+  {
+    ThreadState *thr = cur_thread();
+    ScopedInRtl in_rtl;
+    if (pthread_setspecific(g_thread_finalize_key, (void*)4)) {
+      Printf("ThreadSanitizer: failed to set thread key\n");
+      Die();
+    }
+    while ((tid = atomic_load(&p->tid, memory_order_acquire)) == 0)
+      pthread_yield();
+    atomic_store(&p->tid, 0, memory_order_release);
+    ThreadStart(thr, tid);
+    CHECK_EQ(thr->in_rtl, 1);
+  }
+  void *res = callback(param);
+  // Prevent the callback from being tail called,
+  // it mixes up stack traces.
+  volatile int foo = 42;
+  foo++;
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_create,
+    void *th, void *attr, void *(*callback)(void*), void * param) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_create, th, attr, callback, param);
+  pthread_attr_t myattr;
+  if (attr == 0) {
+    pthread_attr_init(&myattr);
+    attr = &myattr;
+  }
+  int detached = 0;
+  pthread_attr_getdetachstate(attr, &detached);
+  uptr stacksize = 0;
+  pthread_attr_getstacksize(attr, &stacksize);
+  // We place the huge ThreadState object into TLS, account for that.
+  const uptr minstacksize = GetTlsSize() + 128*1024;
+  if (stacksize < minstacksize) {
+    DPrintf("ThreadSanitizer: stacksize %lu->%lu\n", stacksize, minstacksize);
+    pthread_attr_setstacksize(attr, minstacksize);
+  }
+  ThreadParam p;
+  p.callback = callback;
+  p.param = param;
+  atomic_store(&p.tid, 0, memory_order_relaxed);
+  int res = REAL(pthread_create)(th, attr, __tsan_thread_start_func, &p);
+  if (res == 0) {
+    int tid = ThreadCreate(cur_thread(), pc, *(uptr*)th, detached);
+    CHECK_NE(tid, 0);
+    atomic_store(&p.tid, tid, memory_order_release);
+    while (atomic_load(&p.tid, memory_order_acquire) != 0)
+      pthread_yield();
+  }
+  if (attr == &myattr)
+    pthread_attr_destroy(&myattr);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_join, void *th, void **ret) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_join, th, ret);
+  int tid = ThreadTid(thr, pc, (uptr)th);
+  int res = REAL(pthread_join)(th, ret);
+  if (res == 0) {
+    ThreadJoin(cur_thread(), pc, tid);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_detach, void *th) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_detach, th);
+  int tid = ThreadTid(thr, pc, (uptr)th);
+  int res = REAL(pthread_detach)(th);
+  if (res == 0) {
+    ThreadDetach(cur_thread(), pc, tid);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_init, void *m, void *a) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_init, m, a);
+  int res = REAL(pthread_mutex_init)(m, a);
+  if (res == 0) {
+    bool recursive = false;
+    if (a) {
+      int type = 0;
+      if (pthread_mutexattr_gettype(a, &type) == 0)
+        recursive = (type == PTHREAD_MUTEX_RECURSIVE
+            || type == PTHREAD_MUTEX_RECURSIVE_NP);
+    }
+    MutexCreate(cur_thread(), pc, (uptr)m, false, recursive);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_destroy, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_destroy, m);
+  int res = REAL(pthread_mutex_destroy)(m);
+  if (res == 0 || res == EBUSY) {
+    MutexDestroy(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_lock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_lock, m);
+  int res = REAL(pthread_mutex_lock)(m);
+  if (res == 0) {
+    MutexLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_trylock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_trylock, m);
+  int res = REAL(pthread_mutex_trylock)(m);
+  if (res == 0) {
+    MutexLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_timedlock, void *m, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_timedlock, m, abstime);
+  int res = REAL(pthread_mutex_timedlock)(m, abstime);
+  if (res == 0) {
+    MutexLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_unlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_unlock, m);
+  MutexUnlock(cur_thread(), pc, (uptr)m);
+  int res = REAL(pthread_mutex_unlock)(m);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_init, void *m, int pshared) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_init, m, pshared);
+  int res = REAL(pthread_spin_init)(m, pshared);
+  if (res == 0) {
+    MutexCreate(cur_thread(), pc, (uptr)m, false, false);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_destroy, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_destroy, m);
+  int res = REAL(pthread_spin_destroy)(m);
+  if (res == 0) {
+    MutexDestroy(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_lock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_lock, m);
+  int res = REAL(pthread_spin_lock)(m);
+  if (res == 0) {
+    MutexLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_trylock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_trylock, m);
+  int res = REAL(pthread_spin_trylock)(m);
+  if (res == 0) {
+    MutexLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_unlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_unlock, m);
+  MutexUnlock(cur_thread(), pc, (uptr)m);
+  int res = REAL(pthread_spin_unlock)(m);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_init, void *m, void *a) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_init, m, a);
+  int res = REAL(pthread_rwlock_init)(m, a);
+  if (res == 0) {
+    MutexCreate(cur_thread(), pc, (uptr)m, true, false);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_destroy, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_destroy, m);
+  int res = REAL(pthread_rwlock_destroy)(m);
+  if (res == 0) {
+    MutexDestroy(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_rdlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_rdlock, m);
+  int res = REAL(pthread_rwlock_rdlock)(m);
+  if (res == 0) {
+    MutexReadLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_tryrdlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_tryrdlock, m);
+  int res = REAL(pthread_rwlock_tryrdlock)(m);
+  if (res == 0) {
+    MutexReadLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_timedrdlock, void *m, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedrdlock, m, abstime);
+  int res = REAL(pthread_rwlock_timedrdlock)(m, abstime);
+  if (res == 0) {
+    MutexReadLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_wrlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_wrlock, m);
+  int res = REAL(pthread_rwlock_wrlock)(m);
+  if (res == 0) {
+    MutexLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_trywrlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_trywrlock, m);
+  int res = REAL(pthread_rwlock_trywrlock)(m);
+  if (res == 0) {
+    MutexLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_timedwrlock, void *m, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedwrlock, m, abstime);
+  int res = REAL(pthread_rwlock_timedwrlock)(m, abstime);
+  if (res == 0) {
+    MutexLock(cur_thread(), pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_unlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_unlock, m);
+  MutexReadOrWriteUnlock(cur_thread(), pc, (uptr)m);
+  int res = REAL(pthread_rwlock_unlock)(m);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_cond_init, void *c, void *a) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_init, c, a);
+  int res = REAL(pthread_cond_init)(c, a);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_cond_destroy, void *c) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_destroy, c);
+  int res = REAL(pthread_cond_destroy)(c);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_cond_signal, void *c) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_signal, c);
+  int res = REAL(pthread_cond_signal)(c);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_cond_broadcast, void *c) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_broadcast, c);
+  int res = REAL(pthread_cond_broadcast)(c);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_cond_wait, void *c, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_wait, c, m);
+  MutexUnlock(cur_thread(), pc, (uptr)m);
+  int res = REAL(pthread_cond_wait)(c, m);
+  MutexLock(cur_thread(), pc, (uptr)m);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_cond_timedwait, void *c, void *m, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_timedwait, c, m, abstime);
+  MutexUnlock(cur_thread(), pc, (uptr)m);
+  int res = REAL(pthread_cond_timedwait)(c, m, abstime);
+  MutexLock(cur_thread(), pc, (uptr)m);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_barrier_init, void *b, void *a, unsigned count) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_barrier_init, b, a, count);
+  MemoryWrite1Byte(thr, pc, (uptr)b);
+  int res = REAL(pthread_barrier_init)(b, a, count);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_barrier_destroy, void *b) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_barrier_destroy, b);
+  MemoryWrite1Byte(thr, pc, (uptr)b);
+  int res = REAL(pthread_barrier_destroy)(b);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_barrier_wait, void *b) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_barrier_wait, b);
+  Release(cur_thread(), pc, (uptr)b);
+  MemoryRead1Byte(thr, pc, (uptr)b);
+  int res = REAL(pthread_barrier_wait)(b);
+  MemoryRead1Byte(thr, pc, (uptr)b);
+  if (res == 0 || res == PTHREAD_BARRIER_SERIAL_THREAD) {
+    Acquire(cur_thread(), pc, (uptr)b);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_once, o, f);
+  if (o == 0 || f == 0)
+    return EINVAL;
+  atomic_uint32_t *a = static_cast<atomic_uint32_t*>(o);
+  u32 v = atomic_load(a, memory_order_acquire);
+  if (v == 0 && atomic_compare_exchange_strong(a, &v, 1,
+                                               memory_order_relaxed)) {
+    const int old_in_rtl = thr->in_rtl;
+    thr->in_rtl = 0;
+    (*f)();
+    CHECK_EQ(thr->in_rtl, 0);
+    thr->in_rtl = old_in_rtl;
+    Release(cur_thread(), pc, (uptr)o);
+    atomic_store(a, 2, memory_order_release);
+  } else {
+    while (v != 2) {
+      pthread_yield();
+      v = atomic_load(a, memory_order_acquire);
+    }
+    Acquire(cur_thread(), pc, (uptr)o);
+  }
+  return 0;
+}
+
+TSAN_INTERCEPTOR(int, sem_init, void *s, int pshared, unsigned value) {
+  SCOPED_TSAN_INTERCEPTOR(sem_init, s, pshared, value);
+  int res = REAL(sem_init)(s, pshared, value);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, sem_destroy, void *s) {
+  SCOPED_TSAN_INTERCEPTOR(sem_destroy, s);
+  int res = REAL(sem_destroy)(s);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, sem_wait, void *s) {
+  SCOPED_TSAN_INTERCEPTOR(sem_wait, s);
+  int res = REAL(sem_wait)(s);
+  if (res == 0) {
+    Acquire(cur_thread(), pc, (uptr)s);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, sem_trywait, void *s) {
+  SCOPED_TSAN_INTERCEPTOR(sem_trywait, s);
+  int res = REAL(sem_trywait)(s);
+  if (res == 0) {
+    Acquire(cur_thread(), pc, (uptr)s);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, sem_timedwait, void *s, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(sem_timedwait, s, abstime);
+  int res = REAL(sem_timedwait)(s, abstime);
+  if (res == 0) {
+    Acquire(cur_thread(), pc, (uptr)s);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, sem_post, void *s) {
+  SCOPED_TSAN_INTERCEPTOR(sem_post, s);
+  Release(cur_thread(), pc, (uptr)s);
+  int res = REAL(sem_post)(s);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, sem_getvalue, void *s, int *sval) {
+  SCOPED_TSAN_INTERCEPTOR(sem_getvalue, s, sval);
+  int res = REAL(sem_getvalue)(s, sval);
+  if (res == 0) {
+    Acquire(cur_thread(), pc, (uptr)s);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, read, int fd, void *buf, long_t sz) {
+  SCOPED_TSAN_INTERCEPTOR(read, fd, buf, sz);
+  int res = REAL(read)(fd, buf, sz);
+  if (res >= 0) {
+    Acquire(cur_thread(), pc, fd2addr(fd));
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, pread, int fd, void *buf, long_t sz, unsigned off) {
+  SCOPED_TSAN_INTERCEPTOR(pread, fd, buf, sz, off);
+  int res = REAL(pread)(fd, buf, sz, off);
+  if (res >= 0) {
+    Acquire(cur_thread(), pc, fd2addr(fd));
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, pread64, int fd, void *buf, long_t sz, u64 off) {
+  SCOPED_TSAN_INTERCEPTOR(pread64, fd, buf, sz, off);
+  int res = REAL(pread64)(fd, buf, sz, off);
+  if (res >= 0) {
+    Acquire(cur_thread(), pc, fd2addr(fd));
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, readv, int fd, void *vec, int cnt) {
+  SCOPED_TSAN_INTERCEPTOR(readv, fd, vec, cnt);
+  int res = REAL(readv)(fd, vec, cnt);
+  if (res >= 0) {
+    Acquire(cur_thread(), pc, fd2addr(fd));
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, preadv64, int fd, void *vec, int cnt, u64 off) {
+  SCOPED_TSAN_INTERCEPTOR(preadv64, fd, vec, cnt, off);
+  int res = REAL(preadv64)(fd, vec, cnt, off);
+  if (res >= 0) {
+    Acquire(cur_thread(), pc, fd2addr(fd));
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, write, int fd, void *buf, long_t sz) {
+  SCOPED_TSAN_INTERCEPTOR(write, fd, buf, sz);
+  Release(cur_thread(), pc, fd2addr(fd));
+  int res = REAL(write)(fd, buf, sz);
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, pwrite, int fd, void *buf, long_t sz, unsigned off) {
+  SCOPED_TSAN_INTERCEPTOR(pwrite, fd, buf, sz, off);
+  Release(cur_thread(), pc, fd2addr(fd));
+  int res = REAL(pwrite)(fd, buf, sz, off);
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, pwrite64, int fd, void *buf, long_t sz, unsigned off) {
+  SCOPED_TSAN_INTERCEPTOR(pwrite64, fd, buf, sz, off);
+  Release(cur_thread(), pc, fd2addr(fd));
+  int res = REAL(pwrite64)(fd, buf, sz, off);
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, writev, int fd, void *vec, int cnt) {
+  SCOPED_TSAN_INTERCEPTOR(writev, fd, vec, cnt);
+  Release(cur_thread(), pc, fd2addr(fd));
+  int res = REAL(writev)(fd, vec, cnt);
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, pwritev64, int fd, void *vec, int cnt, u64 off) {
+  SCOPED_TSAN_INTERCEPTOR(pwritev64, fd, vec, cnt, off);
+  Release(cur_thread(), pc, fd2addr(fd));
+  int res = REAL(pwritev64)(fd, vec, cnt, off);
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, send, int fd, void *buf, long_t len, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(send, fd, buf, len, flags);
+  Release(cur_thread(), pc, fd2addr(fd));
+  int res = REAL(send)(fd, buf, len, flags);
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, sendmsg, int fd, void *msg, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(sendmsg, fd, msg, flags);
+  Release(cur_thread(), pc, fd2addr(fd));
+  int res = REAL(sendmsg)(fd, msg, flags);
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, recv, int fd, void *buf, long_t len, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(recv, fd, buf, len, flags);
+  int res = REAL(recv)(fd, buf, len, flags);
+  if (res >= 0) {
+    Acquire(cur_thread(), pc, fd2addr(fd));
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(long_t, recvmsg, int fd, void *msg, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(recvmsg, fd, msg, flags);
+  int res = REAL(recvmsg)(fd, msg, flags);
+  if (res >= 0) {
+    Acquire(cur_thread(), pc, fd2addr(fd));
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, unlink, char *path) {
+  SCOPED_TSAN_INTERCEPTOR(unlink, path);
+  Release(cur_thread(), pc, file2addr(path));
+  int res = REAL(unlink)(path);
+  return res;
+}
+
+TSAN_INTERCEPTOR(void*, fopen, char *path, char *mode) {
+  SCOPED_TSAN_INTERCEPTOR(fopen, path, mode);
+  void *res = REAL(fopen)(path, mode);
+  Acquire(cur_thread(), pc, file2addr(path));
+  return res;
+}
+
+TSAN_INTERCEPTOR(uptr, fread, void *ptr, uptr size, uptr nmemb, void *f) {
+  SCOPED_TSAN_INTERCEPTOR(fread, ptr, size, nmemb, f);
+  MemoryAccessRange(thr, pc, (uptr)ptr, size * nmemb, true);
+  return REAL(fread)(ptr, size, nmemb, f);
+}
+
+TSAN_INTERCEPTOR(uptr, fwrite, const void *p, uptr size, uptr nmemb, void *f) {
+  SCOPED_TSAN_INTERCEPTOR(fwrite, p, size, nmemb, f);
+  MemoryAccessRange(thr, pc, (uptr)p, size * nmemb, false);
+  return REAL(fwrite)(p, size, nmemb, f);
+}
+
+TSAN_INTERCEPTOR(int, puts, const char *s) {
+  SCOPED_TSAN_INTERCEPTOR(puts, s);
+  MemoryAccessRange(thr, pc, (uptr)s, REAL(strlen)(s), false);
+  return REAL(puts)(s);
+}
+
+TSAN_INTERCEPTOR(int, rmdir, char *path) {
+  SCOPED_TSAN_INTERCEPTOR(rmdir, path);
+  Release(cur_thread(), pc, dir2addr(path));
+  int res = REAL(rmdir)(path);
+  return res;
+}
+
+TSAN_INTERCEPTOR(void*, opendir, char *path) {
+  SCOPED_TSAN_INTERCEPTOR(opendir, path);
+  void *res = REAL(opendir)(path);
+  Acquire(cur_thread(), pc, dir2addr(path));
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, epoll_ctl, int epfd, int op, int fd, void *ev) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_ctl, epfd, op, fd, ev);
+  if (op == EPOLL_CTL_ADD) {
+    Release(cur_thread(), pc, epollfd2addr(epfd));
+  }
+  int res = REAL(epoll_ctl)(epfd, op, fd, ev);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, epoll_wait, int epfd, void *ev, int cnt, int timeout) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_wait, epfd, ev, cnt, timeout);
+  int res = REAL(epoll_wait)(epfd, ev, cnt, timeout);
+  if (res > 0) {
+    Acquire(cur_thread(), pc, epollfd2addr(epfd));
+  }
+  return res;
+}
+
+static void rtl_sighandler(int sig) {
+  ThreadState *thr = cur_thread();
+  SignalDesc *signal = &thr->pending_signals[sig];
+  if (signal->armed == false) {
+    signal->armed = true;
+    signal->sigaction = false;
+    thr->pending_signal_count++;
+  }
+}
+
+static void rtl_sigaction(int sig, my_siginfo_t *info, void *ctx) {
+  ThreadState *thr = cur_thread();
+  SignalDesc *signal = &thr->pending_signals[sig];
+  if (signal->armed == false) {
+    signal->armed = true;
+    signal->sigaction = true;
+    signal->siginfo = *info;
+    thr->pending_signal_count++;
+  }
+}
+
+TSAN_INTERCEPTOR(int, sigaction, int sig, sigaction_t *act, sigaction_t *old) {
+  SCOPED_TSAN_INTERCEPTOR(sigaction, sig, act, old);
+  int res = 0;
+  if (act == 0 || act->sa_handler == SIG_IGN || act->sa_handler == SIG_DFL) {
+    res = REAL(sigaction)(sig, act, old);
+  } else {
+    sigactions[sig] = *act;
+    sigaction_t newact = *act;
+    if (newact.sa_flags & SA_SIGINFO)
+      newact.sa_sigaction = rtl_sigaction;
+    else
+      newact.sa_handler = rtl_sighandler;
+    res = REAL(sigaction)(sig, &newact, old);
+  }
+  return res;
+}
+
+static void process_pending_signals(ThreadState *thr) {
+  CHECK_EQ(thr->in_rtl, 0);
+  if (thr->pending_signal_count == 0 || thr->in_signal_handler)
+    return;
+  thr->in_signal_handler = true;
+  thr->pending_signal_count = 0;
+  // These are too big for stack.
+  static THREADLOCAL ucontext_t uctx;
+  static THREADLOCAL sigset_t emptyset, oldset;
+  getcontext(&uctx);
+  sigfillset(&emptyset);
+  pthread_sigmask(SIG_SETMASK, &emptyset, &oldset);
+  for (int sig = 0; sig < kSigCount; sig++) {
+    SignalDesc *signal = &thr->pending_signals[sig];
+    if (signal->armed) {
+      signal->armed = false;
+      if (signal->sigaction)
+        sigactions[sig].sa_sigaction(sig, &signal->siginfo, &uctx);
+      else
+        sigactions[sig].sa_handler(sig);
+    }
+  }
+  pthread_sigmask(SIG_SETMASK, &oldset, 0);
+  CHECK_EQ(thr->in_signal_handler, true);
+  thr->in_signal_handler = false;
+}
+
+namespace __tsan {
+
+// Used until we obtain real efficient functions.
+static void* poormans_memset(void *dst, int v, uptr size) {
+  for (uptr i = 0; i < size; i++)
+    ((char*)dst)[i] = (char)v;
+  return dst;
+}
+
+static void* poormans_memcpy(void *dst, const void *src, uptr size) {
+  for (uptr i = 0; i < size; i++)
+    ((char*)dst)[i] = ((char*)src)[i];
+  return dst;
+}
+
+void InitializeInterceptors() {
+  CHECK_GT(cur_thread()->in_rtl, 0);
+
+  // We need to setup it early, because functions like dlsym() can call it.
+  REAL(memset) = poormans_memset;
+  REAL(memcpy) = poormans_memcpy;
+
+  TSAN_INTERCEPT(malloc);
+  TSAN_INTERCEPT(calloc);
+  TSAN_INTERCEPT(realloc);
+  TSAN_INTERCEPT(free);
+  TSAN_INTERCEPT(cfree);
+  TSAN_INTERCEPT(mmap);
+  TSAN_INTERCEPT(mmap64);
+  TSAN_INTERCEPT(munmap);
+  TSAN_INTERCEPT(memalign);
+  TSAN_INTERCEPT(valloc);
+  TSAN_INTERCEPT(pvalloc);
+  TSAN_INTERCEPT(posix_memalign);
+
+  TSAN_INTERCEPT(_Znwm);
+  TSAN_INTERCEPT(_ZnwmRKSt9nothrow_t);
+  TSAN_INTERCEPT(_Znam);
+  TSAN_INTERCEPT(_ZnamRKSt9nothrow_t);
+  TSAN_INTERCEPT(_ZdlPv);
+  TSAN_INTERCEPT(_ZdlPvRKSt9nothrow_t);
+  TSAN_INTERCEPT(_ZdaPv);
+  TSAN_INTERCEPT(_ZdaPvRKSt9nothrow_t);
+
+  TSAN_INTERCEPT(strlen);
+  TSAN_INTERCEPT(memset);
+  TSAN_INTERCEPT(memcpy);
+  TSAN_INTERCEPT(strcmp);
+  TSAN_INTERCEPT(memchr);
+  TSAN_INTERCEPT(memrchr);
+  TSAN_INTERCEPT(memmove);
+  TSAN_INTERCEPT(memcmp);
+  TSAN_INTERCEPT(strchr);
+  TSAN_INTERCEPT(strchrnul);
+  TSAN_INTERCEPT(strrchr);
+  TSAN_INTERCEPT(strncmp);
+  TSAN_INTERCEPT(strcpy);  // NOLINT
+  TSAN_INTERCEPT(strncpy);
+  TSAN_INTERCEPT(strstr);
+
+  TSAN_INTERCEPT(__cxa_guard_acquire);
+  TSAN_INTERCEPT(__cxa_guard_release);
+
+  TSAN_INTERCEPT(pthread_create);
+  TSAN_INTERCEPT(pthread_join);
+  TSAN_INTERCEPT(pthread_detach);
+
+  TSAN_INTERCEPT(pthread_mutex_init);
+  TSAN_INTERCEPT(pthread_mutex_destroy);
+  TSAN_INTERCEPT(pthread_mutex_lock);
+  TSAN_INTERCEPT(pthread_mutex_trylock);
+  TSAN_INTERCEPT(pthread_mutex_timedlock);
+  TSAN_INTERCEPT(pthread_mutex_unlock);
+
+  TSAN_INTERCEPT(pthread_spin_init);
+  TSAN_INTERCEPT(pthread_spin_destroy);
+  TSAN_INTERCEPT(pthread_spin_lock);
+  TSAN_INTERCEPT(pthread_spin_trylock);
+  TSAN_INTERCEPT(pthread_spin_unlock);
+
+  TSAN_INTERCEPT(pthread_rwlock_init);
+  TSAN_INTERCEPT(pthread_rwlock_destroy);
+  TSAN_INTERCEPT(pthread_rwlock_rdlock);
+  TSAN_INTERCEPT(pthread_rwlock_tryrdlock);
+  TSAN_INTERCEPT(pthread_rwlock_timedrdlock);
+  TSAN_INTERCEPT(pthread_rwlock_wrlock);
+  TSAN_INTERCEPT(pthread_rwlock_trywrlock);
+  TSAN_INTERCEPT(pthread_rwlock_timedwrlock);
+  TSAN_INTERCEPT(pthread_rwlock_unlock);
+
+  TSAN_INTERCEPT(pthread_cond_init);
+  TSAN_INTERCEPT(pthread_cond_destroy);
+  TSAN_INTERCEPT(pthread_cond_signal);
+  TSAN_INTERCEPT(pthread_cond_broadcast);
+  TSAN_INTERCEPT(pthread_cond_wait);
+  TSAN_INTERCEPT(pthread_cond_timedwait);
+
+  TSAN_INTERCEPT(pthread_barrier_init);
+  TSAN_INTERCEPT(pthread_barrier_destroy);
+  TSAN_INTERCEPT(pthread_barrier_wait);
+
+  TSAN_INTERCEPT(pthread_once);
+
+  TSAN_INTERCEPT(sem_init);
+  TSAN_INTERCEPT(sem_destroy);
+  TSAN_INTERCEPT(sem_wait);
+  TSAN_INTERCEPT(sem_trywait);
+  TSAN_INTERCEPT(sem_timedwait);
+  TSAN_INTERCEPT(sem_post);
+  TSAN_INTERCEPT(sem_getvalue);
+
+  TSAN_INTERCEPT(read);
+  TSAN_INTERCEPT(pread);
+  TSAN_INTERCEPT(pread64);
+  TSAN_INTERCEPT(readv);
+  TSAN_INTERCEPT(preadv64);
+  TSAN_INTERCEPT(write);
+  TSAN_INTERCEPT(pwrite);
+  TSAN_INTERCEPT(pwrite64);
+  TSAN_INTERCEPT(writev);
+  TSAN_INTERCEPT(pwritev64);
+  TSAN_INTERCEPT(send);
+  TSAN_INTERCEPT(sendmsg);
+  TSAN_INTERCEPT(recv);
+  TSAN_INTERCEPT(recvmsg);
+
+  TSAN_INTERCEPT(unlink);
+  TSAN_INTERCEPT(fopen);
+  TSAN_INTERCEPT(fread);
+  TSAN_INTERCEPT(fwrite);
+  TSAN_INTERCEPT(puts);
+  TSAN_INTERCEPT(rmdir);
+  TSAN_INTERCEPT(opendir);
+
+  TSAN_INTERCEPT(epoll_ctl);
+  TSAN_INTERCEPT(epoll_wait);
+
+  TSAN_INTERCEPT(sigaction);
+
+  atexit_ctx = new(internal_alloc(MBlockAtExit, sizeof(AtExitContext)))
+      AtExitContext();
+
+  if (__cxa_atexit(&finalize, 0, 0)) {
+    Printf("ThreadSanitizer: failed to setup atexit callback\n");
+    Die();
+  }
+
+  if (pthread_key_create(&g_thread_finalize_key, &thread_finalize)) {
+    Printf("ThreadSanitizer: failed to create thread key\n");
+    Die();
+  }
+}
+
+void internal_memset(void *ptr, int c, uptr size) {
+  REAL(memset)(ptr, c, size);
+}
+
+void internal_memcpy(void *dst, const void *src, uptr size) {
+  REAL(memcpy)(dst, src, size);
+}
+
+int internal_memcmp(const void *s1, const void *s2, uptr size) {
+  return REAL(memcmp)(s1, s2, size);
+}
+
+int internal_strcmp(const char *s1, const char *s2) {
+  return REAL(strcmp)(s1, s2);
+}
+
+int internal_strncmp(const char *s1, const char *s2, uptr size) {
+  return REAL(strncmp)(s1, s2, size);
+}
+
+void internal_strcpy(char *s1, const char *s2) {
+  REAL(strcpy)(s1, s2);  // NOLINT
+}
+
+uptr internal_strlen(const char *s) {
+  return REAL(strlen)(s);
+}
+
+char* internal_strdup(const char *s) {
+  uptr len = internal_strlen(s);
+  char *s2 = (char*)internal_alloc(MBlockString, len + 1);
+  internal_memcpy(s2, s, len);
+  s2[len] = 0;
+  return s2;
+}
+
+const char *internal_strstr(const char *where, const char *what) {
+  return REAL(strstr)(where, what);
+}
+
+const char *internal_strchr(const char *where, char what) {
+  return (const char*)REAL(strchr)((void*)where, what);
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_interface.cc b/lib/tsan/rtl/tsan_interface.cc
new file mode 100644
index 0000000..b7b0085
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface.cc
@@ -0,0 +1,42 @@
+//===-- tsan_interface.cc ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_interface.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;  // NOLINT
+
+void __tsan_init() {
+  Initialize(cur_thread());
+}
+
+void __tsan_read16(void *addr) {
+  MemoryRead8Byte(cur_thread(), CALLERPC, (uptr)addr);
+  MemoryRead8Byte(cur_thread(), CALLERPC, (uptr)addr + 8);
+}
+
+void __tsan_write16(void *addr) {
+  MemoryWrite8Byte(cur_thread(), CALLERPC, (uptr)addr);
+  MemoryWrite8Byte(cur_thread(), CALLERPC, (uptr)addr + 8);
+}
+
+void __tsan_acquire(void *addr) {
+  Acquire(cur_thread(), CALLERPC, (uptr)addr);
+}
+
+void __tsan_release(void *addr) {
+  Release(cur_thread(), CALLERPC, (uptr)addr);
+}
diff --git a/lib/tsan/rtl/tsan_interface.h b/lib/tsan/rtl/tsan_interface.h
new file mode 100644
index 0000000..ed21ec6
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface.h
@@ -0,0 +1,51 @@
+//===-- tsan_interface.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The functions declared in this header will be inserted by the instrumentation
+// module.
+// This header can be included by the instrumented program or by TSan tests.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_H
+#define TSAN_INTERFACE_H
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// This function should be called at the very beginning of the process,
+// before any instrumented code is executed and before any call to malloc.
+void __tsan_init();
+
+void __tsan_read1(void *addr);
+void __tsan_read2(void *addr);
+void __tsan_read4(void *addr);
+void __tsan_read8(void *addr);
+void __tsan_read16(void *addr);
+
+void __tsan_write1(void *addr);
+void __tsan_write2(void *addr);
+void __tsan_write4(void *addr);
+void __tsan_write8(void *addr);
+void __tsan_write16(void *addr);
+
+void __tsan_vptr_update(void **vptr_p, void *new_val);
+
+void __tsan_func_entry(void *call_pc);
+void __tsan_func_exit();
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TSAN_INTERFACE_H
diff --git a/lib/tsan/rtl/tsan_interface_ann.cc b/lib/tsan/rtl/tsan_interface_ann.cc
new file mode 100644
index 0000000..96f1f8a
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface_ann.cc
@@ -0,0 +1,344 @@
+//===-- tsan_interface_ann.cc -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_interface_ann.h"
+#include "tsan_mutex.h"
+#include "tsan_placement_new.h"
+#include "tsan_report.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "tsan_flags.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;  // NOLINT
+
+namespace __tsan {
+
+class ScopedAnnotation {
+ public:
+  ScopedAnnotation(ThreadState *thr, const char *aname, const char *f, int l,
+                   uptr pc)
+      : thr_(thr)
+      , in_rtl_(thr->in_rtl) {
+    CHECK_EQ(thr_->in_rtl, 0);
+    FuncEntry(thr_, pc);
+    thr_->in_rtl++;
+    DPrintf("#%d: annotation %s() %s:%d\n", thr_->tid, aname, f, l);
+  }
+
+  ~ScopedAnnotation() {
+    thr_->in_rtl--;
+    CHECK_EQ(in_rtl_, thr_->in_rtl);
+    FuncExit(thr_);
+  }
+ private:
+  ThreadState *const thr_;
+  const int in_rtl_;
+};
+
+#define SCOPED_ANNOTATION(typ) \
+    if (!flags()->enable_annotations) \
+      return; \
+    ThreadState *thr = cur_thread(); \
+    StatInc(thr, StatAnnotation); \
+    StatInc(thr, Stat##typ); \
+    ScopedAnnotation sa(thr, __FUNCTION__, f, l, \
+        (uptr)__builtin_return_address(0)); \
+    const uptr pc = (uptr)&__FUNCTION__; \
+    (void)pc; \
+/**/
+
+static const int kMaxDescLen = 128;
+
+struct ExpectRace {
+  ExpectRace *next;
+  ExpectRace *prev;
+  int hitcount;
+  uptr addr;
+  uptr size;
+  char *file;
+  int line;
+  char desc[kMaxDescLen];
+};
+
+struct DynamicAnnContext {
+  Mutex mtx;
+  ExpectRace expect;
+  ExpectRace benign;
+
+  DynamicAnnContext()
+    : mtx(MutexTypeAnnotations, StatMtxAnnotations) {
+  }
+};
+
+static DynamicAnnContext *dyn_ann_ctx;
+static char dyn_ann_ctx_placeholder[sizeof(DynamicAnnContext)] ALIGN(64);
+
+static void AddExpectRace(ExpectRace *list,
+    char *f, int l, uptr addr, uptr size, char *desc) {
+  ExpectRace *race = list->next;
+  for (; race != list; race = race->next) {
+    if (race->addr == addr && race->size == size)
+      return;
+  }
+  race = (ExpectRace*)internal_alloc(MBlockExpectRace, sizeof(ExpectRace));
+  race->hitcount = 0;
+  race->addr = addr;
+  race->size = size;
+  race->file = f;
+  race->line = l;
+  race->desc[0] = 0;
+  if (desc) {
+    int i = 0;
+    for (; i < kMaxDescLen - 1 && desc[i]; i++)
+      race->desc[i] = desc[i];
+    race->desc[i] = 0;
+  }
+  race->prev = list;
+  race->next = list->next;
+  race->next->prev = race;
+  list->next = race;
+}
+
+static ExpectRace *FindRace(ExpectRace *list, uptr addr, uptr size) {
+  for (ExpectRace *race = list->next; race != list; race = race->next) {
+    uptr maxbegin = max(race->addr, addr);
+    uptr minend = min(race->addr + race->size, addr + size);
+    if (maxbegin < minend)
+      return race;
+  }
+  return 0;
+}
+
+static bool CheckContains(ExpectRace *list, uptr addr, uptr size) {
+  ExpectRace *race = FindRace(list, addr, size);
+  if (race == 0)
+    return false;
+  DPrintf("Hit expected/benign race: %s addr=%lx:%d %s:%d\n",
+      race->desc, race->addr, (int)race->size, race->file, race->line);
+  race->hitcount++;
+  return true;
+}
+
+static void InitList(ExpectRace *list) {
+  list->next = list;
+  list->prev = list;
+}
+
+void InitializeDynamicAnnotations() {
+  dyn_ann_ctx = new(dyn_ann_ctx_placeholder) DynamicAnnContext;
+  InitList(&dyn_ann_ctx->expect);
+  InitList(&dyn_ann_ctx->benign);
+}
+
+bool IsExpectedReport(uptr addr, uptr size) {
+  Lock lock(&dyn_ann_ctx->mtx);
+  if (CheckContains(&dyn_ann_ctx->expect, addr, size))
+    return true;
+  if (CheckContains(&dyn_ann_ctx->benign, addr, size))
+    return true;
+  return false;
+}
+
+}  // namespace __tsan
+
+using namespace __tsan;  // NOLINT
+
+extern "C" {
+void AnnotateHappensBefore(char *f, int l, uptr addr) {
+  SCOPED_ANNOTATION(AnnotateHappensBefore);
+  Release(cur_thread(), CALLERPC, addr);
+}
+
+void AnnotateHappensAfter(char *f, int l, uptr addr) {
+  SCOPED_ANNOTATION(AnnotateHappensAfter);
+  Acquire(cur_thread(), CALLERPC, addr);
+}
+
+void AnnotateCondVarSignal(char *f, int l, uptr cv) {
+  SCOPED_ANNOTATION(AnnotateCondVarSignal);
+}
+
+void AnnotateCondVarSignalAll(char *f, int l, uptr cv) {
+  SCOPED_ANNOTATION(AnnotateCondVarSignalAll);
+}
+
+void AnnotateMutexIsNotPHB(char *f, int l, uptr mu) {
+  SCOPED_ANNOTATION(AnnotateMutexIsNotPHB);
+}
+
+void AnnotateCondVarWait(char *f, int l, uptr cv, uptr lock) {
+  SCOPED_ANNOTATION(AnnotateCondVarWait);
+}
+
+void AnnotateRWLockCreate(char *f, int l, uptr lock) {
+  SCOPED_ANNOTATION(AnnotateRWLockCreate);
+}
+
+void AnnotateRWLockDestroy(char *f, int l, uptr lock) {
+  SCOPED_ANNOTATION(AnnotateRWLockDestroy);
+}
+
+void AnnotateRWLockAcquired(char *f, int l, uptr lock, uptr is_w) {
+  SCOPED_ANNOTATION(AnnotateRWLockAcquired);
+}
+
+void AnnotateRWLockReleased(char *f, int l, uptr lock, uptr is_w) {
+  SCOPED_ANNOTATION(AnnotateRWLockReleased);
+}
+
+void AnnotateTraceMemory(char *f, int l, uptr mem) {
+  SCOPED_ANNOTATION(AnnotateTraceMemory);
+}
+
+void AnnotateFlushState(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateFlushState);
+}
+
+void AnnotateNewMemory(char *f, int l, uptr mem, uptr size) {
+  SCOPED_ANNOTATION(AnnotateNewMemory);
+}
+
+void AnnotateNoOp(char *f, int l, uptr mem) {
+  SCOPED_ANNOTATION(AnnotateNoOp);
+}
+
+static void ReportMissedExpectedRace(ExpectRace *race) {
+  Printf("==================\n");
+  Printf("WARNING: ThreadSanitizer: missed expected data race\n");
+  Printf("  %s addr=%lx %s:%d\n",
+      race->desc, race->addr, race->file, race->line);
+  Printf("==================\n");
+}
+
+void AnnotateFlushExpectedRaces(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateFlushExpectedRaces);
+  Lock lock(&dyn_ann_ctx->mtx);
+  while (dyn_ann_ctx->expect.next != &dyn_ann_ctx->expect) {
+    ExpectRace *race = dyn_ann_ctx->expect.next;
+    if (race->hitcount == 0) {
+      CTX()->nmissed_expected++;
+      ReportMissedExpectedRace(race);
+    }
+    race->prev->next = race->next;
+    race->next->prev = race->prev;
+    internal_free(race);
+  }
+}
+
+void AnnotateEnableRaceDetection(char *f, int l, int enable) {
+  SCOPED_ANNOTATION(AnnotateEnableRaceDetection);
+  // FIXME: Reconsider this functionality later. It may be irrelevant.
+}
+
+void AnnotateMutexIsUsedAsCondVar(char *f, int l, uptr mu) {
+  SCOPED_ANNOTATION(AnnotateMutexIsUsedAsCondVar);
+}
+
+void AnnotatePCQGet(char *f, int l, uptr pcq) {
+  SCOPED_ANNOTATION(AnnotatePCQGet);
+}
+
+void AnnotatePCQPut(char *f, int l, uptr pcq) {
+  SCOPED_ANNOTATION(AnnotatePCQPut);
+}
+
+void AnnotatePCQDestroy(char *f, int l, uptr pcq) {
+  SCOPED_ANNOTATION(AnnotatePCQDestroy);
+}
+
+void AnnotatePCQCreate(char *f, int l, uptr pcq) {
+  SCOPED_ANNOTATION(AnnotatePCQCreate);
+}
+
+void AnnotateExpectRace(char *f, int l, uptr mem, char *desc) {
+  SCOPED_ANNOTATION(AnnotateExpectRace);
+  Lock lock(&dyn_ann_ctx->mtx);
+  AddExpectRace(&dyn_ann_ctx->expect,
+                f, l, mem, 1, desc);
+  DPrintf("Add expected race: %s addr=%lx %s:%d\n", desc, mem, f, l);
+}
+
+static void BenignRaceImpl(char *f, int l, uptr mem, uptr size, char *desc) {
+  Lock lock(&dyn_ann_ctx->mtx);
+  AddExpectRace(&dyn_ann_ctx->benign,
+                f, l, mem, size, desc);
+  DPrintf("Add benign race: %s addr=%lx %s:%d\n", desc, mem, f, l);
+}
+
+// FIXME: Turn it off later. WTF is benign race?1?? Go talk to Hans Boehm.
+void AnnotateBenignRaceSized(char *f, int l, uptr mem, uptr size, char *desc) {
+  SCOPED_ANNOTATION(AnnotateBenignRaceSized);
+  BenignRaceImpl(f, l, mem, size, desc);
+}
+
+void AnnotateBenignRace(char *f, int l, uptr mem, char *desc) {
+  SCOPED_ANNOTATION(AnnotateBenignRace);
+  BenignRaceImpl(f, l, mem, 1, desc);
+}
+
+void AnnotateIgnoreReadsBegin(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreReadsBegin);
+  IgnoreCtl(cur_thread(), false, true);
+}
+
+void AnnotateIgnoreReadsEnd(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreReadsEnd);
+  IgnoreCtl(cur_thread(), false, false);
+}
+
+void AnnotateIgnoreWritesBegin(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreWritesBegin);
+  IgnoreCtl(cur_thread(), true, true);
+}
+
+void AnnotateIgnoreWritesEnd(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreWritesEnd);
+  IgnoreCtl(cur_thread(), true, false);
+}
+
+void AnnotatePublishMemoryRange(char *f, int l, uptr addr, uptr size) {
+  SCOPED_ANNOTATION(AnnotatePublishMemoryRange);
+}
+
+void AnnotateUnpublishMemoryRange(char *f, int l, uptr addr, uptr size) {
+  SCOPED_ANNOTATION(AnnotateUnpublishMemoryRange);
+}
+
+void AnnotateThreadName(char *f, int l, char *name) {
+  SCOPED_ANNOTATION(AnnotateThreadName);
+}
+
+void WTFAnnotateHappensBefore(char *f, int l, uptr addr) {
+  SCOPED_ANNOTATION(AnnotateHappensBefore);
+}
+
+void WTFAnnotateHappensAfter(char *f, int l, uptr addr) {
+  SCOPED_ANNOTATION(AnnotateHappensAfter);
+}
+
+void WTFAnnotateBenignRaceSized(char *f, int l, uptr mem, uptr sz, char *desc) {
+  SCOPED_ANNOTATION(AnnotateBenignRaceSized);
+}
+
+int RunningOnValgrind() {
+  return 0;
+}
+
+const char *ThreadSanitizerQuery(const char *query) {
+  if (internal_strcmp(query, "pure_happens_before") == 0)
+    return "1";
+  else
+    return "0";
+}
+}  // extern "C"
diff --git a/lib/tsan/rtl/tsan_interface_ann.h b/lib/tsan/rtl/tsan_interface_ann.h
new file mode 100644
index 0000000..09e807a
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface_ann.h
@@ -0,0 +1,31 @@
+//===-- tsan_interface_ann.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Interface for dynamic annotations.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_ANN_H
+#define TSAN_INTERFACE_ANN_H
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void __tsan_acquire(void *addr);
+void __tsan_release(void *addr);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TSAN_INTERFACE_ANN_H
diff --git a/lib/tsan/rtl/tsan_interface_atomic.cc b/lib/tsan/rtl/tsan_interface_atomic.cc
new file mode 100644
index 0000000..7e5f191
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface_atomic.cc
@@ -0,0 +1,194 @@
+//===-- tsan_interface_atomic.cc --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_interface_atomic.h"
+#include "tsan_placement_new.h"
+#include "tsan_flags.h"
+#include "tsan_rtl.h"
+
+using namespace __tsan;  // NOLINT
+
+class ScopedAtomic {
+ public:
+  ScopedAtomic(ThreadState *thr, uptr pc, const char *func)
+      : thr_(thr) {
+    CHECK_EQ(thr_->in_rtl, 1);  // 1 due to our own ScopedInRtl member.
+    DPrintf("#%d: %s\n", thr_->tid, func);
+  }
+  ~ScopedAtomic() {
+    CHECK_EQ(thr_->in_rtl, 1);
+  }
+ private:
+  ThreadState *thr_;
+  ScopedInRtl in_rtl_;
+};
+
+// Some shortcuts.
+typedef __tsan_memory_order morder;
+typedef __tsan_atomic8 a8;
+typedef __tsan_atomic16 a16;
+typedef __tsan_atomic32 a32;
+typedef __tsan_atomic64 a64;
+const int mo_relaxed = __tsan_memory_order_relaxed;
+const int mo_consume = __tsan_memory_order_consume;
+const int mo_acquire = __tsan_memory_order_acquire;
+const int mo_release = __tsan_memory_order_release;
+const int mo_acq_rel = __tsan_memory_order_acq_rel;
+const int mo_seq_cst = __tsan_memory_order_seq_cst;
+
+static void AtomicStatInc(ThreadState *thr, uptr size, morder mo, StatType t) {
+  StatInc(thr, StatAtomic);
+  StatInc(thr, t);
+  StatInc(thr, size == 1 ? StatAtomic1
+             : size == 2 ? StatAtomic2
+             : size == 4 ? StatAtomic4
+             :             StatAtomic8);
+  StatInc(thr, mo == mo_relaxed ? StatAtomicRelaxed
+             : mo == mo_consume ? StatAtomicConsume
+             : mo == mo_acquire ? StatAtomicAcquire
+             : mo == mo_release ? StatAtomicRelease
+             : mo == mo_acq_rel ? StatAtomicAcq_Rel
+             :                    StatAtomicSeq_Cst);
+}
+
+#define SCOPED_ATOMIC(func, ...) \
+    mo = flags()->force_seq_cst_atomics ? (morder)mo_seq_cst : mo; \
+    ThreadState *const thr = cur_thread(); \
+    const uptr pc = (uptr)__builtin_return_address(0); \
+    AtomicStatInc(thr, sizeof(*a), mo, StatAtomic##func); \
+    ScopedAtomic sa(thr, pc, __FUNCTION__); \
+    return Atomic##func(thr, pc, __VA_ARGS__); \
+/**/
+
+template<typename T>
+static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a,
+    morder mo) {
+  CHECK(mo & (mo_relaxed | mo_consume | mo_acquire | mo_seq_cst));
+  T v = *a;
+  if (mo & (mo_consume | mo_acquire | mo_seq_cst))
+    Acquire(thr, pc, (uptr)a);
+  return v;
+}
+
+template<typename T>
+static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  CHECK(mo & (mo_relaxed | mo_release | mo_seq_cst));
+  if (mo & (mo_release | mo_seq_cst))
+    Release(thr, pc, (uptr)a);
+  *a = v;
+}
+
+template<typename T>
+static T AtomicExchange(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  if (mo & (mo_release | mo_acq_rel | mo_seq_cst))
+    Release(thr, pc, (uptr)a);
+  v = __sync_lock_test_and_set(a, v);
+  if (mo & (mo_consume | mo_acquire | mo_acq_rel | mo_seq_cst))
+    Acquire(thr, pc, (uptr)a);
+  return v;
+}
+
+template<typename T>
+static T AtomicFetchAdd(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  if (mo & (mo_release | mo_acq_rel | mo_seq_cst))
+    Release(thr, pc, (uptr)a);
+  v = __sync_fetch_and_add(a, v);
+  if (mo & (mo_consume | mo_acquire | mo_acq_rel | mo_seq_cst))
+    Acquire(thr, pc, (uptr)a);
+  return v;
+}
+
+template<typename T>
+static bool AtomicCAS(ThreadState *thr, uptr pc,
+    volatile T *a, T *c, T v, morder mo) {
+  if (mo & (mo_release | mo_acq_rel | mo_seq_cst))
+    Release(thr, pc, (uptr)a);
+  T cc = *c;
+  T pr = __sync_val_compare_and_swap(a, cc, v);
+  if (mo & (mo_consume | mo_acquire | mo_acq_rel | mo_seq_cst))
+    Acquire(thr, pc, (uptr)a);
+  if (pr == cc)
+    return true;
+  *c = pr;
+  return false;
+}
+
+static void AtomicFence(ThreadState *thr, uptr pc, morder mo) {
+  __sync_synchronize();
+}
+
+a8 __tsan_atomic8_load(const volatile a8 *a, morder mo) {
+  SCOPED_ATOMIC(Load, a, mo);
+}
+
+a16 __tsan_atomic16_load(const volatile a16 *a, morder mo) {
+  SCOPED_ATOMIC(Load, a, mo);
+}
+
+a32 __tsan_atomic32_load(const volatile a32 *a, morder mo) {
+  SCOPED_ATOMIC(Load, a, mo);
+}
+
+a64 __tsan_atomic64_load(const volatile a64 *a, morder mo) {
+  SCOPED_ATOMIC(Load, a, mo);
+}
+
+void __tsan_atomic8_store(volatile a8 *a, a8 v, morder mo) {
+  SCOPED_ATOMIC(Store, a, v, mo);
+}
+
+void __tsan_atomic16_store(volatile a16 *a, a16 v, morder mo) {
+  SCOPED_ATOMIC(Store, a, v, mo);
+}
+
+void __tsan_atomic32_store(volatile a32 *a, a32 v, morder mo) {
+  SCOPED_ATOMIC(Store, a, v, mo);
+}
+
+void __tsan_atomic64_store(volatile a64 *a, a64 v, morder mo) {
+  SCOPED_ATOMIC(Store, a, v, mo);
+}
+
+a32 __tsan_atomic32_exchange(volatile a32 *a, a32 v, morder mo) {
+  SCOPED_ATOMIC(Exchange, a, v, mo);
+}
+
+a64 __tsan_atomic64_exchange(volatile a64 *a, a64 v, morder mo) {
+  SCOPED_ATOMIC(Exchange, a, v, mo);
+}
+
+a32 __tsan_atomic32_fetch_add(volatile a32 *a, a32 v, morder mo) {
+  SCOPED_ATOMIC(FetchAdd, a, v, mo);
+}
+
+a64 __tsan_atomic64_fetch_add(volatile a64 *a, a64 v, morder mo) {
+  SCOPED_ATOMIC(FetchAdd, a, v, mo);
+}
+
+int __tsan_atomic32_compare_exchange_strong(volatile a32 *a, a32 *c, a32 v,
+    morder mo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo);
+}
+
+int __tsan_atomic64_compare_exchange_strong(volatile a64 *a, a64 *c, a64 v,
+    morder mo) {
+  SCOPED_ATOMIC(CAS, a, c, v, mo);
+}
+
+void __tsan_atomic_thread_fence(morder mo) {
+  char* a;
+  SCOPED_ATOMIC(Fence, mo);
+}
diff --git a/lib/tsan/rtl/tsan_interface_atomic.h b/lib/tsan/rtl/tsan_interface_atomic.h
new file mode 100644
index 0000000..7244079
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface_atomic.h
@@ -0,0 +1,73 @@
+//===-- tsan_interface_atomic.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_ATOMIC_H
+#define TSAN_INTERFACE_ATOMIC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef char  __tsan_atomic8;
+typedef short __tsan_atomic16;  // NOLINT
+typedef int   __tsan_atomic32;
+typedef long  __tsan_atomic64;  // NOLINT
+
+typedef enum {
+  __tsan_memory_order_relaxed = 1 << 0,
+  __tsan_memory_order_consume = 1 << 1,
+  __tsan_memory_order_acquire = 1 << 2,
+  __tsan_memory_order_release = 1 << 3,
+  __tsan_memory_order_acq_rel = 1 << 4,
+  __tsan_memory_order_seq_cst = 1 << 5,
+} __tsan_memory_order;
+
+__tsan_atomic8 __tsan_atomic8_load(const volatile __tsan_atomic8 *a,
+    __tsan_memory_order mo);
+__tsan_atomic16 __tsan_atomic16_load(const volatile __tsan_atomic16 *a,
+    __tsan_memory_order mo);
+__tsan_atomic32 __tsan_atomic32_load(const volatile __tsan_atomic32 *a,
+    __tsan_memory_order mo);
+__tsan_atomic64 __tsan_atomic64_load(const volatile __tsan_atomic64 *a,
+    __tsan_memory_order mo);
+
+void __tsan_atomic8_store(volatile __tsan_atomic8 *a, __tsan_atomic8 v,
+    __tsan_memory_order mo);
+void __tsan_atomic16_store(volatile __tsan_atomic16 *a, __tsan_atomic16 v,
+    __tsan_memory_order mo);
+void __tsan_atomic32_store(volatile __tsan_atomic32 *a, __tsan_atomic32 v,
+    __tsan_memory_order mo);
+void __tsan_atomic64_store(volatile __tsan_atomic64 *a, __tsan_atomic64 v,
+    __tsan_memory_order mo);
+
+__tsan_atomic32 __tsan_atomic32_exchange(volatile __tsan_atomic32 *a,
+    __tsan_atomic32 v, __tsan_memory_order mo);
+__tsan_atomic64 __tsan_atomic64_exchange(volatile __tsan_atomic64 *a,
+    __tsan_atomic64 v, __tsan_memory_order mo);
+
+__tsan_atomic32 __tsan_atomic32_fetch_add(volatile __tsan_atomic32 *a,
+    __tsan_atomic32 v, __tsan_memory_order mo);
+__tsan_atomic64 __tsan_atomic64_fetch_add(volatile __tsan_atomic64 *a,
+    __tsan_atomic64 v, __tsan_memory_order mo);
+
+int __tsan_atomic32_compare_exchange_strong(volatile __tsan_atomic32 *a,
+    __tsan_atomic32 *c, __tsan_atomic32 v, __tsan_memory_order mo);
+int __tsan_atomic64_compare_exchange_strong(volatile __tsan_atomic64 *a,
+    __tsan_atomic64 *c, __tsan_atomic64 v, __tsan_memory_order mo);
+
+void __tsan_atomic_thread_fence(__tsan_memory_order mo);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // #ifndef TSAN_INTERFACE_ATOMIC_H
diff --git a/lib/tsan/rtl/tsan_interface_inl.h b/lib/tsan/rtl/tsan_interface_inl.h
new file mode 100644
index 0000000..233f902
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interface_inl.h
@@ -0,0 +1,65 @@
+//===-- tsan_interface_inl.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_interface.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;  // NOLINT
+
+void __tsan_read1(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 0, 0);
+}
+
+void __tsan_read2(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, 0);
+}
+
+void __tsan_read4(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, 0);
+}
+
+void __tsan_read8(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 3, 0);
+}
+
+void __tsan_write1(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 0, 1);
+}
+
+void __tsan_write2(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, 1);
+}
+
+void __tsan_write4(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, 1);
+}
+
+void __tsan_write8(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 3, 1);
+}
+
+void __tsan_vptr_update(void **vptr_p, void *new_val) {
+  CHECK_EQ(sizeof(vptr_p), 8);
+  if (*vptr_p != new_val)
+    MemoryAccess(cur_thread(), CALLERPC, (uptr)vptr_p, 3, 1);
+}
+
+void __tsan_func_entry(void *pc) {
+  FuncEntry(cur_thread(), (uptr)pc);
+}
+
+void __tsan_func_exit() {
+  FuncExit(cur_thread());
+}
diff --git a/lib/tsan/rtl/tsan_md5.cc b/lib/tsan/rtl/tsan_md5.cc
new file mode 100644
index 0000000..4873360
--- /dev/null
+++ b/lib/tsan/rtl/tsan_md5.cc
@@ -0,0 +1,245 @@
+//===-- tsan_md5.cc ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+#define F(x, y, z)      ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z)      ((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z)      ((x) ^ (y) ^ (z))
+#define I(x, y, z)      ((y) ^ ((x) | ~(z)))
+
+#define STEP(f, a, b, c, d, x, t, s) \
+  (a) += f((b), (c), (d)) + (x) + (t); \
+  (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+  (a) += (b);
+
+#define SET(n) \
+  (*(MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+  SET(n)
+
+typedef unsigned int MD5_u32plus;
+typedef unsigned long ulong_t;  // NOLINT
+
+typedef struct {
+  MD5_u32plus lo, hi;
+  MD5_u32plus a, b, c, d;
+  unsigned char buffer[64];
+  MD5_u32plus block[16];
+} MD5_CTX;
+
+static void *body(MD5_CTX *ctx, void *data, ulong_t size) {
+  unsigned char *ptr;
+  MD5_u32plus a, b, c, d;
+  MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+  ptr = (unsigned char*)data;
+
+  a = ctx->a;
+  b = ctx->b;
+  c = ctx->c;
+  d = ctx->d;
+
+  do {
+    saved_a = a;
+    saved_b = b;
+    saved_c = c;
+    saved_d = d;
+
+    STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+    STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+    STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+    STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+    STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+    STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+    STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+    STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+    STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+    STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+    STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+    STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+    STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+    STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+    STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+    STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+    STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+    STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+    STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+    STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+    STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+    STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+    STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+    STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+    STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+    STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+    STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+    STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+    STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+    STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+    STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+    STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+    STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+    STEP(H, d, a, b, c, GET(8), 0x8771f681, 11)
+    STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+    STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23)
+    STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+    STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+    STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+    STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23)
+    STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+    STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11)
+    STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+    STEP(H, b, c, d, a, GET(6), 0x04881d05, 23)
+    STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+    STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11)
+    STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+    STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+    STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+    STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+    STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+    STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+    STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+    STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+    STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+    STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+    STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+    STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+    STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+    STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+    STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+    STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+    STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+    STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+    a += saved_a;
+    b += saved_b;
+    c += saved_c;
+    d += saved_d;
+
+    ptr += 64;
+  } while (size -= 64);
+
+  ctx->a = a;
+  ctx->b = b;
+  ctx->c = c;
+  ctx->d = d;
+
+  return ptr;
+}
+
+void MD5_Init(MD5_CTX *ctx) {
+  ctx->a = 0x67452301;
+  ctx->b = 0xefcdab89;
+  ctx->c = 0x98badcfe;
+  ctx->d = 0x10325476;
+
+  ctx->lo = 0;
+  ctx->hi = 0;
+}
+
+void MD5_Update(MD5_CTX *ctx, void *data, ulong_t size) {
+  MD5_u32plus saved_lo;
+  ulong_t used, free;
+
+  saved_lo = ctx->lo;
+  if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+    ctx->hi++;
+  ctx->hi += size >> 29;
+
+  used = saved_lo & 0x3f;
+
+  if (used) {
+    free = 64 - used;
+
+    if (size < free) {
+      internal_memcpy(&ctx->buffer[used], data, size);
+      return;
+    }
+
+    internal_memcpy(&ctx->buffer[used], data, free);
+    data = (unsigned char *)data + free;
+    size -= free;
+    body(ctx, ctx->buffer, 64);
+  }
+
+  if (size >= 64) {
+    data = body(ctx, data, size & ~(ulong_t)0x3f);
+    size &= 0x3f;
+  }
+
+  internal_memcpy(ctx->buffer, data, size);
+}
+
+void MD5_Final(unsigned char *result, MD5_CTX *ctx) {
+  ulong_t used, free;
+
+  used = ctx->lo & 0x3f;
+
+  ctx->buffer[used++] = 0x80;
+
+  free = 64 - used;
+
+  if (free < 8) {
+    internal_memset(&ctx->buffer[used], 0, free);
+    body(ctx, ctx->buffer, 64);
+    used = 0;
+    free = 64;
+  }
+
+  internal_memset(&ctx->buffer[used], 0, free - 8);
+
+  ctx->lo <<= 3;
+  ctx->buffer[56] = ctx->lo;
+  ctx->buffer[57] = ctx->lo >> 8;
+  ctx->buffer[58] = ctx->lo >> 16;
+  ctx->buffer[59] = ctx->lo >> 24;
+  ctx->buffer[60] = ctx->hi;
+  ctx->buffer[61] = ctx->hi >> 8;
+  ctx->buffer[62] = ctx->hi >> 16;
+  ctx->buffer[63] = ctx->hi >> 24;
+
+  body(ctx, ctx->buffer, 64);
+
+  result[0] = ctx->a;
+  result[1] = ctx->a >> 8;
+  result[2] = ctx->a >> 16;
+  result[3] = ctx->a >> 24;
+  result[4] = ctx->b;
+  result[5] = ctx->b >> 8;
+  result[6] = ctx->b >> 16;
+  result[7] = ctx->b >> 24;
+  result[8] = ctx->c;
+  result[9] = ctx->c >> 8;
+  result[10] = ctx->c >> 16;
+  result[11] = ctx->c >> 24;
+  result[12] = ctx->d;
+  result[13] = ctx->d >> 8;
+  result[14] = ctx->d >> 16;
+  result[15] = ctx->d >> 24;
+
+  internal_memset(ctx, 0, sizeof(*ctx));
+}
+
+MD5Hash md5_hash(const void *data, uptr size) {
+  MD5Hash res;
+  MD5_CTX ctx;
+  MD5_Init(&ctx);
+  MD5_Update(&ctx, (void*)data, size);
+  MD5_Final((unsigned char*)&res.hash[0], &ctx);
+  return res;
+}
+}
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
new file mode 100644
index 0000000..e24074e
--- /dev/null
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -0,0 +1,137 @@
+//===-- tsan_mman.cc --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_mman.h"
+#include "tsan_allocator.h"
+#include "tsan_rtl.h"
+#include "tsan_report.h"
+#include "tsan_flags.h"
+
+namespace __tsan {
+
+static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
+  if (!thr->in_signal_handler || !flags()->report_signal_unsafe)
+    return;
+  StackTrace stack;
+  stack.ObtainCurrent(thr, pc);
+  ScopedReport rep(ReportTypeSignalUnsafe);
+  rep.AddStack(&stack);
+  OutputReport(rep);
+}
+
+void *user_alloc(ThreadState *thr, uptr pc, uptr sz) {
+  CHECK_GT(thr->in_rtl, 0);
+  MBlock *b = (MBlock*)Alloc(sz + sizeof(MBlock));
+  b->size = sz;
+  void *p = b + 1;
+  if (CTX() && CTX()->initialized) {
+    MemoryResetRange(thr, pc, (uptr)p, sz);
+  }
+  DPrintf("#%d: alloc(%lu) = %p\n", thr->tid, sz, p);
+  SignalUnsafeCall(thr, pc);
+  return p;
+}
+
+void user_free(ThreadState *thr, uptr pc, void *p) {
+  CHECK_GT(thr->in_rtl, 0);
+  CHECK_NE(p, (void*)0);
+  DPrintf("#%d: free(%p)\n", thr->tid, p);
+  MBlock *b = user_mblock(thr, p);
+  p = b + 1;
+  if (CTX() && CTX()->initialized && thr->in_rtl == 1) {
+    MemoryRangeFreed(thr, pc, (uptr)p, b->size);
+  }
+  Free(b);
+  SignalUnsafeCall(thr, pc);
+}
+
+void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz) {
+  CHECK_GT(thr->in_rtl, 0);
+  void *p2 = 0;
+  // FIXME: Handle "shrinking" more efficiently,
+  // it seems that some software actually does this.
+  if (sz) {
+    p2 = user_alloc(thr, pc, sz);
+    if (p) {
+      MBlock *b = user_mblock(thr, p);
+      internal_memcpy(p2, p, min(b->size, sz));
+    }
+  }
+  if (p) {
+    user_free(thr, pc, p);
+  }
+  return p2;
+}
+
+void *user_alloc_aligned(ThreadState *thr, uptr pc, uptr sz, uptr align) {
+  CHECK_GT(thr->in_rtl, 0);
+  void *p = user_alloc(thr, pc, sz + align);
+  void *pa = RoundUp(p, align);
+  DCHECK_LE((uptr)pa + sz, (uptr)p + sz + align);
+  return pa;
+}
+
+MBlock *user_mblock(ThreadState *thr, void *p) {
+  CHECK_GT(thr->in_rtl, 0);
+  CHECK_NE(p, (void*)0);
+  MBlock *b = (MBlock*)AllocBlock(p);
+  // FIXME: Output a warning, it's a user error.
+  if (p < (char*)(b + 1) || p > (char*)(b + 1) + b->size) {
+    Printf("user_mblock p=%p b=%p size=%lu beg=%p end=%p\n",
+        p, b, b->size, (char*)(b + 1), (char*)(b + 1) + b->size);
+    CHECK_GE(p, (char*)(b + 1));
+    CHECK_LE(p, (char*)(b + 1) + b->size);
+  }
+  return b;
+}
+
+#if TSAN_DEBUG
+struct InternalMBlock {
+  static u32 const kMagic = 0xBCEBC041;
+  u32 magic;
+  u32 typ;
+  u64 sz;
+};
+#endif
+
+void *internal_alloc(MBlockType typ, uptr sz) {
+  ThreadState *thr = cur_thread();
+  CHECK_GT(thr->in_rtl, 0);
+#if TSAN_DEBUG
+  InternalMBlock *b = (InternalMBlock*)Alloc(sizeof(InternalMBlock) + sz);
+  b->magic = InternalMBlock::kMagic;
+  b->typ = typ;
+  b->sz = sz;
+  thr->int_alloc_cnt[typ] += 1;
+  thr->int_alloc_siz[typ] += sz;
+  void *p = b + 1;
+  return p;
+#else
+  return Alloc(sz);
+#endif
+}
+
+void internal_free(void *p) {
+  ThreadState *thr = cur_thread();
+  CHECK_GT(thr->in_rtl, 0);
+#if TSAN_DEBUG
+  InternalMBlock *b = (InternalMBlock*)p - 1;
+  CHECK_EQ(b->magic, InternalMBlock::kMagic);
+  thr->int_alloc_cnt[b->typ] -= 1;
+  thr->int_alloc_siz[b->typ] -= b->sz;
+  Free(b);
+#else
+  Free(p);
+#endif
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_mman.h b/lib/tsan/rtl/tsan_mman.h
new file mode 100644
index 0000000..8b51de6
--- /dev/null
+++ b/lib/tsan/rtl/tsan_mman.h
@@ -0,0 +1,111 @@
+//===-- tsan_mman.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_MMAN_H
+#define TSAN_MMAN_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+// Descriptor of user's memory block.
+struct MBlock {
+  uptr size;
+};
+
+// For user allocations.
+void *user_alloc(ThreadState *thr, uptr pc, uptr sz);
+// Does not accept NULL.
+void user_free(ThreadState *thr, uptr pc, void *p);
+void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz);
+void *user_alloc_aligned(ThreadState *thr, uptr pc, uptr sz, uptr align);
+// Given the pointer p into a valid allocated block,
+// returns the descriptor of the block.
+MBlock *user_mblock(ThreadState *thr, void *p);
+
+enum MBlockType {
+  MBlockScopedBuf,
+  MBlockString,
+  MBlockStackTrace,
+  MBlockSync,
+  MBlockClock,
+  MBlockThreadContex,
+  MBlockRacyStacks,
+  MBlockRacyAddresses,
+  MBlockAtExit,
+  MBlockFlag,
+  MBlockReport,
+  MBlockReportMop,
+  MBlockReportThread,
+  MBlockReportMutex,
+  MBlockReportLoc,
+  MBlockReportStack,
+  MBlockSuppression,
+  MBlockExpectRace,
+
+  // This must be the last.
+  MBlockTypeCount,
+};
+
+// For internal data structures.
+void *internal_alloc(MBlockType typ, uptr sz);
+void internal_free(void *p);
+
+template<typename T>
+void DestroyAndFree(T *&p) {
+  p->~T();
+  internal_free(p);
+  p = 0;
+}
+
+template<typename T>
+class InternalScopedBuf {
+ public:
+  explicit InternalScopedBuf(uptr cnt) {
+    cnt_ = cnt;
+    ptr_ = (T*)internal_alloc(MBlockScopedBuf, cnt * sizeof(T));
+  }
+
+  ~InternalScopedBuf() {
+    internal_free(ptr_);
+  }
+
+  operator T *() {
+    return ptr_;
+  }
+
+  T &operator[](uptr i) {
+    return ptr_[i];
+  }
+
+  T *Ptr() {
+    return ptr_;
+  }
+
+  uptr Count() {
+    return cnt_;
+  }
+
+  uptr Size() {
+    return cnt_ * sizeof(T);
+  }
+
+ private:
+  T *ptr_;
+  uptr cnt_;
+
+  InternalScopedBuf(const InternalScopedBuf&);
+  void operator = (const InternalScopedBuf&);
+};
+
+}  // namespace __tsan
+#endif  // TSAN_MMAN_H
diff --git a/lib/tsan/rtl/tsan_mutex.cc b/lib/tsan/rtl/tsan_mutex.cc
new file mode 100644
index 0000000..a343a8b
--- /dev/null
+++ b/lib/tsan/rtl/tsan_mutex.cc
@@ -0,0 +1,275 @@
+//===-- tsan_mutex.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_mutex.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+// Simple reader-writer spin-mutex. Optimized for not-so-contended case.
+// Readers have preference, can possibly starvate writers.
+
+// The table fixes what mutexes can be locked under what mutexes.
+// E.g. if the row for MutexTypeThreads contains MutexTypeReport,
+// then Report mutex can be locked while under Threads mutex.
+// The leaf mutexes can be locked under any other mutexes.
+// Recursive locking is not supported.
+const MutexType MutexTypeLeaf = (MutexType)-1;
+static MutexType CanLockTab[MutexTypeCount][MutexTypeCount] = {
+  /*0 MutexTypeInvalid*/     {},
+  /*1 MutexTypeTrace*/       {MutexTypeLeaf},
+  /*2 MutexTypeThreads*/     {MutexTypeReport},
+  /*3 MutexTypeReport*/      {},
+  /*4 MutexTypeSyncVar*/     {},
+  /*5 MutexTypeSyncTab*/     {MutexTypeSyncVar},
+  /*6 MutexTypeSlab*/        {MutexTypeLeaf},
+  /*7 MutexTypeAnnotations*/ {},
+  /*8 MutexTypeAtExit*/      {MutexTypeSyncTab},
+};
+
+static bool CanLockAdj[MutexTypeCount][MutexTypeCount];
+
+void InitializeMutex() {
+  // Build the "can lock" adjacency matrix.
+  // If [i][j]==true, then one can lock mutex j while under mutex i.
+  const int N = MutexTypeCount;
+  int cnt[N] = {};
+  bool leaf[N] = {};
+  for (int i = 1; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      int z = CanLockTab[i][j];
+      if (z == MutexTypeInvalid)
+        continue;
+      if (z == MutexTypeLeaf) {
+        CHECK(!leaf[i]);
+        leaf[i] = true;
+        continue;
+      }
+      CHECK(!CanLockAdj[i][z]);
+      CanLockAdj[i][z] = true;
+      cnt[i]++;
+    }
+  }
+  for (int i = 0; i < N; i++) {
+    CHECK(!leaf[i] || cnt[i] == 0);
+  }
+  // Add leaf mutexes.
+  for (int i = 0; i < N; i++) {
+    if (!leaf[i])
+      continue;
+    for (int j = 0; j < N; j++) {
+      if (i == j || leaf[j] || j == MutexTypeInvalid)
+        continue;
+      CHECK(!CanLockAdj[j][i]);
+      CanLockAdj[j][i] = true;
+    }
+  }
+  // Build the transitive closure.
+  bool CanLockAdj2[MutexTypeCount][MutexTypeCount];
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      CanLockAdj2[i][j] = CanLockAdj[i][j];
+    }
+  }
+  for (int k = 0; k < N; k++) {
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        if (CanLockAdj2[i][k] && CanLockAdj2[k][j]) {
+          CanLockAdj2[i][j] = true;
+        }
+      }
+    }
+  }
+#if 0
+  Printf("Can lock graph:\n");
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      Printf("%d ", CanLockAdj[i][j]);
+    }
+    Printf("\n");
+  }
+  Printf("Can lock graph closure:\n");
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      Printf("%d ", CanLockAdj2[i][j]);
+    }
+    Printf("\n");
+  }
+#endif
+  // Verify that the graph is acyclic.
+  for (int i = 0; i < N; i++) {
+    if (CanLockAdj2[i][i]) {
+      Printf("Mutex %d participates in a cycle\n", i);
+      Die();
+    }
+  }
+}
+
+DeadlockDetector::DeadlockDetector() {
+  // Rely on zero initialization because some mutexes can be locked before ctor.
+}
+
+void DeadlockDetector::Lock(MutexType t) {
+  // Printf("LOCK %d @%llu\n", t, seq_ + 1);
+  u64 max_seq = 0;
+  u64 max_idx = MutexTypeInvalid;
+  for (int i = 0; i != MutexTypeCount; i++) {
+    if (locked_[i] == 0)
+      continue;
+    CHECK_NE(locked_[i], max_seq);
+    if (max_seq < locked_[i]) {
+      max_seq = locked_[i];
+      max_idx = i;
+    }
+  }
+  locked_[t] = ++seq_;
+  if (max_idx == MutexTypeInvalid)
+    return;
+  // Printf("  last %d @%llu\n", max_idx, max_seq);
+  if (!CanLockAdj[max_idx][t]) {
+    Printf("ThreadSanitizer: internal deadlock detected\n");
+    Printf("ThreadSanitizer: can't lock %d while under %llu\n", t, max_idx);
+    Die();
+  }
+}
+
+void DeadlockDetector::Unlock(MutexType t) {
+  // Printf("UNLO %d @%llu #%llu\n", t, seq_, locked_[t]);
+  CHECK(locked_[t]);
+  locked_[t] = 0;
+}
+
+const uptr kUnlocked = 0;
+const uptr kWriteLock = 1;
+const uptr kReadLock = 2;
+
+class Backoff {
+ public:
+  Backoff()
+    : iter_() {
+  }
+
+  bool Do() {
+    if (iter_++ < kActiveSpinIters)
+      proc_yield(kActiveSpinCnt);
+    else
+      sched_yield();
+    return true;
+  }
+
+  u64 Contention() const {
+    u64 active = iter_ % kActiveSpinIters;
+    u64 passive = iter_ - active;
+    return active + 10 * passive;
+  }
+
+ private:
+  int iter_;
+  static const int kActiveSpinIters = 10;
+  static const int kActiveSpinCnt = 20;
+};
+
+Mutex::Mutex(MutexType type, StatType stat_type) {
+  CHECK_GT(type, MutexTypeInvalid);
+  CHECK_LT(type, MutexTypeCount);
+#if TSAN_DEBUG
+  type_ = type;
+#endif
+#if TSAN_COLLECT_STATS
+  stat_type_ = stat_type;
+#endif
+  atomic_store(&state_, kUnlocked, memory_order_relaxed);
+}
+
+Mutex::~Mutex() {
+  CHECK_EQ(atomic_load(&state_, memory_order_relaxed), kUnlocked);
+}
+
+void Mutex::Lock() {
+#if TSAN_DEBUG
+  cur_thread()->deadlock_detector.Lock(type_);
+#endif
+  uptr cmp = kUnlocked;
+  if (atomic_compare_exchange_strong(&state_, &cmp, kWriteLock,
+                                     memory_order_acquire))
+    return;
+  for (Backoff backoff; backoff.Do();) {
+    if (atomic_load(&state_, memory_order_relaxed) == kUnlocked) {
+      cmp = kUnlocked;
+      if (atomic_compare_exchange_weak(&state_, &cmp, kWriteLock,
+                                       memory_order_acquire)) {
+#if TSAN_COLLECT_STATS
+        StatInc(cur_thread(), stat_type_, backoff.Contention());
+#endif
+        return;
+      }
+    }
+  }
+}
+
+void Mutex::Unlock() {
+  uptr prev = atomic_fetch_sub(&state_, kWriteLock, memory_order_release);
+  (void)prev;
+  DCHECK_NE(prev & kWriteLock, 0);
+#if TSAN_DEBUG
+  cur_thread()->deadlock_detector.Unlock(type_);
+#endif
+}
+
+void Mutex::ReadLock() {
+#if TSAN_DEBUG
+  cur_thread()->deadlock_detector.Lock(type_);
+#endif
+  uptr prev = atomic_fetch_add(&state_, kReadLock, memory_order_acquire);
+  if ((prev & kWriteLock) == 0)
+    return;
+  for (Backoff backoff; backoff.Do();) {
+    prev = atomic_load(&state_, memory_order_acquire);
+    if ((prev & kWriteLock) == 0) {
+#if TSAN_COLLECT_STATS
+      StatInc(cur_thread(), stat_type_, backoff.Contention());
+#endif
+      return;
+    }
+  }
+}
+
+void Mutex::ReadUnlock() {
+  uptr prev = atomic_fetch_sub(&state_, kReadLock, memory_order_release);
+  (void)prev;
+  DCHECK_EQ(prev & kWriteLock, 0);
+  DCHECK_GT(prev & ~kWriteLock, 0);
+#if TSAN_DEBUG
+  cur_thread()->deadlock_detector.Unlock(type_);
+#endif
+}
+
+Lock::Lock(Mutex *m)
+  : m_(m) {
+  m_->Lock();
+}
+
+Lock::~Lock() {
+  m_->Unlock();
+}
+
+ReadLock::ReadLock(Mutex *m)
+  : m_(m) {
+  m_->ReadLock();
+}
+
+ReadLock::~ReadLock() {
+  m_->ReadUnlock();
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_mutex.h b/lib/tsan/rtl/tsan_mutex.h
new file mode 100644
index 0000000..2180978
--- /dev/null
+++ b/lib/tsan/rtl/tsan_mutex.h
@@ -0,0 +1,98 @@
+//===-- tsan_mutex.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_MUTEX_H
+#define TSAN_MUTEX_H
+
+#include "tsan_atomic.h"
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+enum MutexType {
+  MutexTypeInvalid,
+  MutexTypeTrace,
+  MutexTypeThreads,
+  MutexTypeReport,
+  MutexTypeSyncVar,
+  MutexTypeSyncTab,
+  MutexTypeSlab,
+  MutexTypeAnnotations,
+  MutexTypeAtExit,
+
+  // This must be the last.
+  MutexTypeCount,
+};
+
+class Mutex {
+ public:
+  explicit Mutex(MutexType type, StatType stat_type);
+  ~Mutex();
+
+  void Lock();
+  void Unlock();
+
+  void ReadLock();
+  void ReadUnlock();
+
+ private:
+  atomic_uintptr_t state_;
+#if TSAN_DEBUG
+  MutexType type_;
+#endif
+#if TSAN_COLLECT_STATS
+  StatType stat_type_;
+#endif
+
+  Mutex(const Mutex&);
+  void operator = (const Mutex&);
+};
+
+class Lock {
+ public:
+  explicit Lock(Mutex *m);
+  ~Lock();
+
+ private:
+  Mutex *m_;
+
+  Lock(const Lock&);
+  void operator = (const Lock&);
+};
+
+class ReadLock {
+ public:
+  explicit ReadLock(Mutex *m);
+  ~ReadLock();
+
+ private:
+  Mutex *m_;
+
+  ReadLock(const ReadLock&);
+  void operator = (const ReadLock&);
+};
+
+class DeadlockDetector {
+ public:
+  DeadlockDetector();
+  void Lock(MutexType t);
+  void Unlock(MutexType t);
+ private:
+  u64 seq_;
+  u64 locked_[MutexTypeCount];
+};
+
+void InitializeMutex();
+
+}  // namespace __tsan
+
+#endif  // TSAN_MUTEX_H
diff --git a/lib/tsan/rtl/tsan_placement_new.h b/lib/tsan/rtl/tsan_placement_new.h
new file mode 100644
index 0000000..7b8ba03
--- /dev/null
+++ b/lib/tsan/rtl/tsan_placement_new.h
@@ -0,0 +1,24 @@
+//===-- tsan_placement_new.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The file provides 'placement new'
+// Do not include it into header files, only into source files.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_PLACEMENT_NEW_H
+#define TSAN_PLACEMENT_NEW_H
+
+#include "tsan_defs.h"
+
+inline void *operator new(__tsan::uptr sz, void *p) {
+  return p;
+}
+
+#endif  // TSAN_PLACEMENT_NEW_H
diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h
new file mode 100644
index 0000000..9cfe476
--- /dev/null
+++ b/lib/tsan/rtl/tsan_platform.h
@@ -0,0 +1,93 @@
+//===-- tsan_platform.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Platform-specific code.
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_LINUX_H
+#define TSAN_LINUX_H
+#ifdef __linux__
+
+#include "tsan_rtl.h"
+
+#if __LP64__
+namespace __tsan {
+
+// TSAN_COMPAT_SHADOW is intended for COMPAT virtual memory layout,
+// when memory addresses are of the 0x2axxxxxxxxxx form.
+// The option is enabled with 'setarch x86_64 -L'.
+#if defined(TSAN_COMPAT_SHADOW) && TSAN_COMPAT_SHADOW
+
+static const uptr kLinuxAppMemBeg = 0x2a0000000000ULL;
+static const uptr kLinuxAppMemEnd = 0x7fffffffffffULL;
+
+#else
+
+static const uptr kLinuxAppMemBeg = 0x7ef000000000ULL;
+static const uptr kLinuxAppMemEnd = 0x7fffffffffffULL;
+
+#endif
+
+static const uptr kLinuxAppMemMsk = 0x7c0000000000ULL;
+
+// This has to be a macro to allow constant initialization of constants below.
+#define MemToShadow(addr) \
+    (((addr) & ~(kLinuxAppMemMsk | (kShadowCell - 1))) * kShadowCnt)
+
+static const uptr kLinuxShadowBeg = MemToShadow(kLinuxAppMemBeg);
+static const uptr kLinuxShadowEnd =
+  MemToShadow(kLinuxAppMemEnd) | (kPageSize - 1);
+
+static inline bool IsAppMem(uptr mem) {
+  return mem >= kLinuxAppMemBeg && mem <= kLinuxAppMemEnd;
+}
+
+static inline bool IsShadowMem(uptr mem) {
+  return mem >= kLinuxShadowBeg && mem <= kLinuxShadowEnd;
+}
+
+static inline uptr ShadowToMem(uptr shadow) {
+  CHECK(IsShadowMem(shadow));
+#if defined(TSAN_COMPAT_SHADOW) && TSAN_COMPAT_SHADOW
+  // COMPAT mapping is not quite one-to-one.
+  return (shadow / kShadowCnt) | 0x280000000000ULL;
+#else
+  return (shadow / kShadowCnt) | kLinuxAppMemMsk;
+#endif
+}
+
+const char *InitializePlatform();
+void FinalizePlatform();
+int GetPid();
+
+void sched_yield();
+
+typedef int fd_t;
+const fd_t kInvalidFd = -1;
+fd_t internal_open(const char *name, bool write);
+void internal_close(fd_t fd);
+uptr internal_filesize(fd_t fd);  // -1 on error.
+uptr internal_read(fd_t fd, void *p, uptr size);
+uptr internal_write(fd_t fd, const void *p, uptr size);
+const char *internal_getpwd();
+
+uptr GetTlsSize();
+void GetThreadStackAndTls(uptr *stk_addr, uptr *stk_size,
+                          uptr *tls_addr, uptr *tls_size);
+
+}  // namespace __tsan
+
+#else  // __LP64__
+# error "Only 64-bit is supported"
+#endif
+
+#endif  // __linux__
+#endif  // TSAN_LINUX_H
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
new file mode 100644
index 0000000..171c348
--- /dev/null
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -0,0 +1,250 @@
+//===-- tsan_platform_linux.cc ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Linux-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+#include "tsan_flags.h"
+
+#include <asm/prctl.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+#include <dlfcn.h>
+
+extern "C" int arch_prctl(int code, __tsan::uptr *addr);
+
+namespace __tsan {
+
+static uptr g_tls_size;
+
+ScopedInRtl::ScopedInRtl()
+    : thr_(cur_thread()) {
+  in_rtl_ = thr_->in_rtl;
+  thr_->in_rtl++;
+  errno_ = errno;
+}
+
+ScopedInRtl::~ScopedInRtl() {
+  thr_->in_rtl--;
+  errno = errno_;
+  CHECK_EQ(in_rtl_, thr_->in_rtl);
+}
+
+void Die() {
+  _exit(1);
+}
+
+static void *my_mmap(void *addr, size_t length, int prot, int flags,
+                    int fd, u64 offset) {
+  ScopedInRtl in_rtl;
+# if __WORDSIZE == 64
+  return (void *)syscall(__NR_mmap, addr, length, prot, flags, fd, offset);
+# else
+  return (void *)syscall(__NR_mmap2, addr, length, prot, flags, fd, offset);
+# endif
+}
+
+void sched_yield() {
+  ScopedInRtl in_rtl;
+  syscall(__NR_sched_yield);
+}
+
+fd_t internal_open(const char *name, bool write) {
+  ScopedInRtl in_rtl;
+  return syscall(__NR_open, name,
+      write ? O_WRONLY | O_CREAT | O_CLOEXEC : O_RDONLY, 0660);
+}
+
+void internal_close(fd_t fd) {
+  ScopedInRtl in_rtl;
+  syscall(__NR_close, fd);
+}
+
+uptr internal_filesize(fd_t fd) {
+  struct stat st = {};
+  if (syscall(__NR_fstat, fd, &st))
+    return -1;
+  return (uptr)st.st_size;
+}
+
+uptr internal_read(fd_t fd, void *p, uptr size) {
+  ScopedInRtl in_rtl;
+  return syscall(__NR_read, fd, p, size);
+}
+
+uptr internal_write(fd_t fd, const void *p, uptr size) {
+  ScopedInRtl in_rtl;
+  return syscall(__NR_write, fd, p, size);
+}
+
+const char *internal_getpwd() {
+  return getenv("PWD");
+}
+
+static void ProtectRange(uptr beg, uptr end) {
+  ScopedInRtl in_rtl;
+  CHECK_LE(beg, end);
+  if (beg == end)
+    return;
+  if (beg != (uptr)my_mmap((void*)(beg), end - beg,
+      PROT_NONE,
+      MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
+      -1, 0)) {
+    Printf("FATAL: ThreadSanitizer can not protect [%lx,%lx]\n", beg, end);
+    Printf("FATAL: Make sure you are not using unlimited stack\n");
+    Die();
+  }
+}
+
+void InitializeShadowMemory() {
+  const uptr kClosedLowBeg  = 0x200000;
+  const uptr kClosedLowEnd  = kLinuxShadowBeg - 1;
+  const uptr kClosedMidBeg = kLinuxShadowEnd + 1;
+  const uptr kClosedMidEnd = kLinuxAppMemBeg - 1;
+  uptr shadow = (uptr)my_mmap((void*)kLinuxShadowBeg,
+      kLinuxShadowEnd - kLinuxShadowBeg,
+      PROT_READ | PROT_WRITE,
+      MAP_PRIVATE | MAP_ANON | MAP_FIXED | MAP_NORESERVE,
+      0, 0);
+  if (shadow != kLinuxShadowBeg) {
+    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
+    Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
+    Die();
+  }
+  ProtectRange(kClosedLowBeg, kClosedLowEnd);
+  ProtectRange(kClosedMidBeg, kClosedMidEnd);
+  DPrintf("kClosedLow   %lx-%lx (%luGB)\n",
+      kClosedLowBeg, kClosedLowEnd, (kClosedLowEnd - kClosedLowBeg) >> 30);
+  DPrintf("kLinuxShadow %lx-%lx (%luGB)\n",
+      kLinuxShadowBeg, kLinuxShadowEnd,
+      (kLinuxShadowEnd - kLinuxShadowBeg) >> 30);
+  DPrintf("kClosedMid   %lx-%lx (%luGB)\n",
+      kClosedMidBeg, kClosedMidEnd, (kClosedMidEnd - kClosedMidBeg) >> 30);
+  DPrintf("kLinuxAppMem %lx-%lx (%luGB)\n",
+      kLinuxAppMemBeg, kLinuxAppMemEnd,
+      (kLinuxAppMemEnd - kLinuxAppMemBeg) >> 30);
+  DPrintf("stack        %lx\n", (uptr)&shadow);
+}
+
+static void CheckPIE() {
+  // Ensure that the binary is indeed compiled with -pie.
+  fd_t fmaps = internal_open("/proc/self/maps", false);
+  if (fmaps == kInvalidFd)
+    return;
+  char buf[20];
+  if (internal_read(fmaps, buf, sizeof(buf)) == sizeof(buf)) {
+    buf[sizeof(buf) - 1] = 0;
+    u64 addr = strtoll(buf, 0, 16);
+    if ((u64)addr < kLinuxAppMemBeg) {
+      Printf("FATAL: ThreadSanitizer can not mmap the shadow memory ("
+             "something is mapped at 0x%llx < 0x%lx)\n",
+             addr, kLinuxAppMemBeg);
+      Printf("FATAL: Make sure to compile with -fPIE"
+             " and to link with -pie.\n");
+      Die();
+    }
+  }
+  internal_close(fmaps);
+}
+
+#ifdef __i386__
+# define INTERNAL_FUNCTION __attribute__((regparm(3), stdcall))
+#else
+# define INTERNAL_FUNCTION
+#endif
+extern "C" void _dl_get_tls_static_info(size_t*, size_t*)
+    __attribute__((weak)) INTERNAL_FUNCTION;
+
+static int InitTlsSize() {
+  typedef void (*get_tls_func)(size_t*, size_t*) INTERNAL_FUNCTION;
+  get_tls_func get_tls = &_dl_get_tls_static_info;
+  if (get_tls == 0)
+    get_tls = (get_tls_func)dlsym(RTLD_NEXT, "_dl_get_tls_static_info");
+  CHECK_NE(get_tls, 0);
+  size_t tls_size = 0;
+  size_t tls_align = 0;
+  get_tls(&tls_size, &tls_align);
+  return tls_size;
+}
+
+const char *InitializePlatform() {
+  void *p = 0;
+  if (sizeof(p) == 8) {
+    // Disable core dumps, dumping of 16TB usually takes a bit long.
+    // The following magic is to prevent clang from replacing it with memset.
+    volatile rlimit lim;
+    lim.rlim_cur = 0;
+    lim.rlim_max = 0;
+    setrlimit(RLIMIT_CORE, (rlimit*)&lim);
+  }
+
+  CheckPIE();
+  g_tls_size = (uptr)InitTlsSize();
+  return getenv("TSAN_OPTIONS");
+}
+
+void FinalizePlatform() {
+  fflush(0);
+}
+
+uptr GetTlsSize() {
+  return g_tls_size;
+}
+
+void GetThreadStackAndTls(uptr *stk_addr, uptr *stk_size,
+                          uptr *tls_addr, uptr *tls_size) {
+  *stk_addr = 0;
+  *stk_size = 0;
+  pthread_attr_t attr;
+  if (pthread_getattr_np(pthread_self(), &attr) == 0) {
+    pthread_attr_getstack(&attr, (void**)stk_addr, (size_t*)stk_size);
+    pthread_attr_destroy(&attr);
+  }
+  arch_prctl(ARCH_GET_FS, tls_addr);
+  *tls_addr -= g_tls_size;
+  *tls_size = g_tls_size;
+
+  // If stack and tls intersect, make them non-intersecting.
+  if (*tls_addr > *stk_addr && *tls_addr < *stk_addr + *stk_size) {
+    CHECK_GT(*tls_addr + *tls_size, *stk_addr);
+    CHECK_LE(*tls_addr + *tls_size, *stk_addr + *stk_size);
+    *stk_size = *tls_addr - *stk_addr;
+    *stk_size = RoundUp(*stk_size, kPageSize);
+    uptr stk_end = *stk_addr + *stk_size;
+    if (stk_end > *tls_addr) {
+      *tls_size -= *tls_addr - stk_end;
+      *tls_addr = stk_end;
+    }
+  }
+}
+
+int GetPid() {
+  return getpid();
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_printf.cc b/lib/tsan/rtl/tsan_printf.cc
new file mode 100644
index 0000000..96c4d24
--- /dev/null
+++ b/lib/tsan/rtl/tsan_printf.cc
@@ -0,0 +1,147 @@
+//===-- tsan_printf.cc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_defs.h"
+#include "tsan_mman.h"
+#include "tsan_platform.h"
+
+#include <stdarg.h>  // va_list
+
+typedef long long i64;  // NOLINT
+typedef long iptr;  // NOLINT
+
+namespace __tsan {
+
+static int AppendChar(char **buff, const char *buff_end, char c) {
+  if (*buff < buff_end) {
+    **buff = c;
+    (*buff)++;
+  }
+  return 1;
+}
+
+static int AppendUnsigned(char **buff, const char *buff_end, u64 num,
+                          int base, uptr minimal_num_length) {
+  uptr const kMaxLen = 30;
+  uptr num_buffer[kMaxLen];
+  uptr pos = 0;
+  do {
+    num_buffer[pos++] = num % base;
+    num /= base;
+  } while (num > 0);
+  while (pos < minimal_num_length) num_buffer[pos++] = 0;
+  int result = 0;
+  while (pos-- > 0) {
+    uptr digit = num_buffer[pos];
+    result += AppendChar(buff, buff_end, (digit < 10) ? '0' + digit
+                                                      : 'a' + digit - 10);
+  }
+  return result;
+}
+
+static int AppendSignedDecimal(char **buff, const char *buff_end, i64 num) {
+  int result = 0;
+  if (num < 0) {
+    result += AppendChar(buff, buff_end, '-');
+    num = -num;
+  }
+  result += AppendUnsigned(buff, buff_end, (u64)num, 10, 0);
+  return result;
+}
+
+static int AppendString(char **buff, const char *buff_end, const char *s) {
+  if (s == 0)
+    s = "<null>";
+  int result = 0;
+  for (; *s; s++) {
+    result += AppendChar(buff, buff_end, *s);
+  }
+  return result;
+}
+
+static int AppendPointer(char **buff, const char *buff_end, u64 ptr_value) {
+  int result = 0;
+  result += AppendString(buff, buff_end, "0x");
+  result += AppendUnsigned(buff, buff_end, ptr_value, 16,
+      (sizeof(void*) == 8) ? 12 : 8);  // NOLINT
+  return result;
+}
+
+static uptr VSNPrintf(char *buff, int buff_length,
+                     const char *format, va_list args) {
+  const char *buff_end = &buff[buff_length - 1];
+  const char *cur = format;
+  int result = 0;
+  for (; *cur; cur++) {
+    if (*cur != '%') {
+      result += AppendChar(&buff, buff_end, *cur);
+      continue;
+    }
+    cur++;
+    bool is_long = (*cur == 'l');
+    cur += is_long;
+    bool is_llong = (*cur == 'l');
+    cur += is_llong;
+    switch (*cur) {
+      case 'd': {
+        i64 v = is_llong ? va_arg(args, i64)
+            : is_long ? va_arg(args, iptr)
+            : va_arg(args, int);
+        result += AppendSignedDecimal(&buff, buff_end, v);
+        break;
+      }
+      case 'u':
+      case 'x': {
+        u64 v = is_llong ? va_arg(args, u64)
+            : is_long ? va_arg(args, uptr)
+            : va_arg(args, unsigned);
+        result += AppendUnsigned(&buff, buff_end, v, *cur == 'u' ? 10: 16, 0);
+        break;
+      }
+      case 'p': {
+        result += AppendPointer(&buff, buff_end, va_arg(args, uptr));
+        break;
+      }
+      case 's': {
+        result += AppendString(&buff, buff_end, va_arg(args, char*));
+        break;
+      }
+      default: {
+        Die();
+      }
+    }
+  }
+  AppendChar(&buff, buff_end + 1, '\0');
+  return result;
+}
+
+void Printf(const char *format, ...) {
+  ScopedInRtl in_rtl;
+  const uptr kMaxLen = 16 * 1024;
+  InternalScopedBuf<char> buffer(kMaxLen);
+  va_list args;
+  va_start(args, format);
+  uptr len = VSNPrintf(buffer, buffer.Size(), format, args);
+  va_end(args);
+  internal_write(CTX() ? flags()->log_fileno : 2,
+      buffer, len < buffer.Size() ? len : buffer.Size() - 1);
+}
+
+uptr Snprintf(char *buffer, uptr length, const char *format, ...) {
+  va_list args;
+  va_start(args, format);
+  uptr len = VSNPrintf(buffer, length, format, args);
+  va_end(args);
+  return len;
+}
+}
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
new file mode 100644
index 0000000..f231ed0
--- /dev/null
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -0,0 +1,124 @@
+//===-- tsan_report.cc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_report.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+ReportDesc::ReportDesc()
+    : stacks(MBlockReportStack)
+    , mops(MBlockReportMop)
+    , locs(MBlockReportLoc)
+    , mutexes(MBlockReportMutex)
+    , threads(MBlockReportThread) {
+}
+
+ReportDesc::~ReportDesc() {
+}
+
+static void PrintHeader(ReportType typ) {
+  Printf("WARNING: ThreadSanitizer: ");
+
+  if (typ == ReportTypeRace)
+    Printf("data race");
+  else if (typ == ReportTypeThreadLeak)
+    Printf("thread leak");
+  else if (typ == ReportTypeMutexDestroyLocked)
+    Printf("destroy of a locked mutex");
+  else if (typ == ReportTypeSignalUnsafe)
+    Printf("signal-unsafe call inside of a signal");
+
+  Printf(" (pid=%d)\n", GetPid());
+}
+
+static void PrintStack(const ReportStack *ent) {
+  for (int i = 0; ent; ent = ent->next, i++) {
+    Printf("    #%d %s %s:%d", i, ent->func, ent->file, ent->line);
+    if (ent->col)
+      Printf(":%d", ent->col);
+    if (ent->module && ent->offset)
+      Printf(" (%s+%p)\n", ent->module, (void*)ent->offset);
+    else
+      Printf(" (%p)\n", (void*)ent->pc);
+  }
+}
+
+static void PrintMop(const ReportMop *mop, bool first) {
+  Printf("  %s of size %d at %p",
+      (first ? (mop->write ? "Write" : "Read")
+             : (mop->write ? "Previous write" : "Previous read")),
+      mop->size, (void*)mop->addr);
+  if (mop->tid == 0)
+    Printf(" by main thread:\n");
+  else
+    Printf(" by thread %d:\n", mop->tid);
+  PrintStack(mop->stack);
+}
+
+static void PrintLocation(const ReportLocation *loc) {
+  if (loc->type == ReportLocationGlobal) {
+    Printf("  Location is global '%s' of size %lu at %lx %s:%d\n",
+           loc->name, loc->size, loc->addr, loc->file, loc->line);
+  } else if (loc->type == ReportLocationHeap) {
+    Printf("  Location is heap of size %lu at %lx allocated by thread %d:\n",
+           loc->size, loc->addr, loc->tid);
+    PrintStack(loc->stack);
+  } else if (loc->type == ReportLocationStack) {
+    Printf("  Location is stack of thread %d:\n", loc->tid);
+  }
+}
+
+static void PrintMutex(const ReportMutex *rm) {
+  if (rm->stack == 0)
+    return;
+  Printf("  Mutex %d created at:\n", rm->id);
+  PrintStack(rm->stack);
+}
+
+static void PrintThread(const ReportThread *rt) {
+  if (rt->id == 0)  // Little sense in describing the main thread.
+    return;
+  Printf("  Thread %d", rt->id);
+  if (rt->name)
+    Printf(" '%s'", rt->name);
+  Printf(" (%s)", rt->running ? "running" : "finished");
+  if (rt->stack)
+    Printf(" created at:");
+  Printf("\n");
+  PrintStack(rt->stack);
+}
+
+void PrintReport(const ReportDesc *rep) {
+  Printf("==================\n");
+  PrintHeader(rep->typ);
+
+  for (uptr i = 0; i < rep->stacks.Size(); i++)
+    PrintStack(rep->stacks[i]);
+
+  for (uptr i = 0; i < rep->mops.Size(); i++)
+    PrintMop(rep->mops[i], i == 0);
+
+  for (uptr i = 0; i < rep->locs.Size(); i++)
+    PrintLocation(rep->locs[i]);
+
+  for (uptr i = 0; i < rep->mutexes.Size(); i++)
+    PrintMutex(rep->mutexes[i]);
+
+  for (uptr i = 0; i < rep->threads.Size(); i++)
+    PrintThread(rep->threads[i]);
+
+  Printf("==================\n");
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_report.h b/lib/tsan/rtl/tsan_report.h
new file mode 100644
index 0000000..70c5932
--- /dev/null
+++ b/lib/tsan/rtl/tsan_report.h
@@ -0,0 +1,100 @@
+//===-- tsan_report.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_REPORT_H
+#define TSAN_REPORT_H
+
+#include "tsan_defs.h"
+#include "tsan_vector.h"
+
+namespace __tsan {
+
+enum ReportType {
+  ReportTypeRace,
+  ReportTypeThreadLeak,
+  ReportTypeMutexDestroyLocked,
+  ReportTypeSignalUnsafe,
+};
+
+struct ReportStack {
+  ReportStack *next;
+  char *module;
+  uptr offset;
+  uptr pc;
+  char *func;
+  char *file;
+  int line;
+  int col;
+};
+
+struct ReportMop {
+  int tid;
+  uptr addr;
+  int size;
+  bool write;
+  int nmutex;
+  int *mutex;
+  ReportStack *stack;
+};
+
+enum ReportLocationType {
+  ReportLocationGlobal,
+  ReportLocationHeap,
+  ReportLocationStack,
+};
+
+struct ReportLocation {
+  ReportLocationType type;
+  uptr addr;
+  uptr size;
+  int tid;
+  char *name;
+  char *file;
+  int line;
+  ReportStack *stack;
+};
+
+struct ReportThread {
+  int id;
+  bool running;
+  char *name;
+  ReportStack *stack;
+};
+
+struct ReportMutex {
+  int id;
+  ReportStack *stack;
+};
+
+class ReportDesc {
+ public:
+  ReportType typ;
+  Vector<ReportStack*> stacks;
+  Vector<ReportMop*> mops;
+  Vector<ReportLocation*> locs;
+  Vector<ReportMutex*> mutexes;
+  Vector<ReportThread*> threads;
+
+  ReportDesc();
+  ~ReportDesc();
+
+ private:
+  ReportDesc(const ReportDesc&);
+  void operator = (const ReportDesc&);
+};
+
+// Format and output the report to the console/log. No additional logic.
+void PrintReport(const ReportDesc *rep);
+
+}  // namespace __tsan
+
+#endif  // TSAN_REPORT_H
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
new file mode 100644
index 0000000..0e04c44
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -0,0 +1,460 @@
+//===-- tsan_rtl.cc ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Main file (entry points) for the TSan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "tsan_defs.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+#include "tsan_interface.h"
+#include "tsan_atomic.h"
+#include "tsan_mman.h"
+#include "tsan_placement_new.h"
+#include "tsan_suppressions.h"
+
+volatile int __tsan_stop = 0;
+
+extern "C" void __tsan_resume() {
+  __tsan_stop = 0;
+}
+
+namespace __tsan {
+
+THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGN(64);
+static char ctx_placeholder[sizeof(Context)] ALIGN(64);
+
+static Context *ctx;
+Context *CTX() {
+  return ctx;
+}
+
+Context::Context()
+  : initialized()
+  , report_mtx(MutexTypeReport, StatMtxReport)
+  , nreported()
+  , nmissed_expected()
+  , thread_mtx(MutexTypeThreads, StatMtxThreads)
+  , racy_stacks(MBlockRacyStacks)
+  , racy_addresses(MBlockRacyAddresses) {
+}
+
+// The objects are allocated in TLS, so one may rely on zero-initialization.
+ThreadState::ThreadState(Context *ctx, int tid, u64 epoch,
+                         uptr stk_addr, uptr stk_size,
+                         uptr tls_addr, uptr tls_size)
+  : fast_state(tid, epoch)
+  // Do not touch these, rely on zero initialization,
+  // they may be accessed before the ctor.
+  // , fast_ignore_reads()
+  // , fast_ignore_writes()
+  // , in_rtl()
+  , shadow_stack_pos(&shadow_stack[0])
+  , tid(tid)
+  , func_call_count()
+  , stk_addr(stk_addr)
+  , stk_size(stk_size)
+  , tls_addr(tls_addr)
+  , tls_size(tls_size) {
+}
+
+ThreadContext::ThreadContext(int tid)
+  : tid(tid)
+  , unique_id()
+  , user_id()
+  , thr()
+  , status(ThreadStatusInvalid)
+  , detached()
+  , reuse_count()
+  , epoch0()
+  , epoch1()
+  , dead_next() {
+}
+
+void Initialize(ThreadState *thr) {
+  // Thread safe because done before all threads exist.
+  static bool is_initialized = false;
+  if (is_initialized)
+    return;
+  is_initialized = true;
+  ScopedInRtl in_rtl;
+  InitializeInterceptors();
+  const char *env = InitializePlatform();
+  InitializeMutex();
+  InitializeDynamicAnnotations();
+  ctx = new(ctx_placeholder) Context;
+  InitializeShadowMemory();
+  ctx->dead_list_size = 0;
+  ctx->dead_list_head = 0;
+  ctx->dead_list_tail = 0;
+  InitializeFlags(&ctx->flags, env);
+  InitializeSuppressions();
+
+  if (ctx->flags.verbosity)
+    Printf("***** Running under ThreadSanitizer v2 (pid=%d) *****\n", GetPid());
+
+  // Initialize thread 0.
+  ctx->thread_seq = 0;
+  int tid = ThreadCreate(thr, 0, 0, true);
+  CHECK_EQ(tid, 0);
+  ThreadStart(thr, tid);
+  CHECK_EQ(thr->in_rtl, 1);
+  ctx->initialized = true;
+
+  if (__tsan_stop) {
+    Printf("ThreadSanitizer is suspended at startup.\n");
+    while (__tsan_stop);
+  }
+}
+
+int Finalize(ThreadState *thr) {
+  ScopedInRtl in_rtl;
+  Context *ctx = __tsan::ctx;
+  bool failed = false;
+
+  // Be very careful beyond that point.
+  // All bets are off. Everything is destroyed.
+  ThreadFinish(thr);
+  ThreadFinalize(thr);
+  FinalizeFlags(&ctx->flags);
+
+  if (ctx->nreported) {
+    failed = true;
+    Printf("ThreadSanitizer: reported %d warnings\n", ctx->nreported);
+  }
+
+  if (ctx->nmissed_expected) {
+    failed = true;
+    Printf("ThreadSanitizer: missed %d expected races\n",
+        ctx->nmissed_expected);
+  }
+
+  StatOutput(ctx->stat);
+  FinalizeSuppressions();
+  FinalizePlatform();
+
+  const int exitcode = failed ? flags()->exitcode : 0;
+  const int log_fileno = flags()->log_fileno;
+  __tsan::ctx->~Context();
+  __tsan::ctx = 0;
+
+  InternalAllocStatAggregate(ctx, thr);
+
+  for (int i = 0; i < (int)MBlockTypeCount; i++) {
+    if (ctx->int_alloc_cnt[i] == 0 && ctx->int_alloc_siz[i] == 0)
+      continue;
+    InternalScopedBuf<char> tmp(1024);
+    Snprintf(tmp, tmp.Size(), "ThreadSanitizer: Internal memory leak: "
+        "type=%d count=%lld size=%lld\n",
+        (int)i, ctx->int_alloc_cnt[i], ctx->int_alloc_siz[i]);
+    internal_write(log_fileno, tmp, internal_strlen(tmp));
+  }
+
+  return exitcode;
+}
+
+static void TraceSwitch(ThreadState *thr) {
+  ScopedInRtl in_rtl;
+  Lock l(&thr->trace.mtx);
+  unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % kTraceParts;
+  TraceHeader *hdr = &thr->trace.headers[trace];
+  hdr->epoch0 = thr->fast_state.epoch();
+  hdr->stack0.ObtainCurrent(thr, 0);
+}
+
+extern "C" void __tsan_trace_switch() {
+  TraceSwitch(cur_thread());
+}
+
+extern "C" void __tsan_report_race() {
+  ReportRace(cur_thread());
+}
+
+ALWAYS_INLINE
+static Shadow LoadShadow(u64 *p) {
+  u64 raw = atomic_load((atomic_uint64_t*)p, memory_order_relaxed);
+  return Shadow(raw);
+}
+
+ALWAYS_INLINE
+static void StoreShadow(u64 *sp, u64 s) {
+  atomic_store((atomic_uint64_t*)sp, s, memory_order_relaxed);
+}
+
+ALWAYS_INLINE
+static void StoreIfNotYetStored(u64 *sp, u64 *s) {
+  StoreShadow(sp, *s);
+  *s = 0;
+}
+
+static inline void HandleRace(ThreadState *thr, u64 *shadow_mem,
+                              Shadow cur, Shadow old) {
+  thr->racy_state[0] = cur.raw();
+  thr->racy_state[1] = old.raw();
+  thr->racy_shadow_addr = shadow_mem;
+  HACKY_CALL(__tsan_report_race);
+}
+
+static inline bool BothReads(Shadow s, int kAccessIsWrite) {
+  return !kAccessIsWrite && !s.is_write();
+}
+
+static inline bool OldIsRWStronger(Shadow old, int kAccessIsWrite) {
+  return old.is_write() || !kAccessIsWrite;
+}
+
+static inline bool OldIsRWWeaker(Shadow old, int kAccessIsWrite) {
+  return !old.is_write() || kAccessIsWrite;
+}
+
+static inline bool OldIsInSameSynchEpoch(Shadow old, ThreadState *thr) {
+  return old.epoch() >= thr->fast_synch_epoch;
+}
+
+static inline bool HappensBefore(Shadow old, ThreadState *thr) {
+  return thr->clock.get(old.tid()) >= old.epoch();
+}
+
+ALWAYS_INLINE
+void MemoryAccessImpl(ThreadState *thr, uptr addr,
+    int kAccessSizeLog, bool kAccessIsWrite, FastState fast_state,
+    u64 *shadow_mem, Shadow cur) {
+  StatInc(thr, StatMop);
+  StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
+  StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog));
+
+  // This potentially can live in an MMX/SSE scratch register.
+  // The required intrinsics are:
+  // __m128i _mm_move_epi64(__m128i*);
+  // _mm_storel_epi64(u64*, __m128i);
+  u64 store_word = cur.raw();
+
+  // scan all the shadow values and dispatch to 4 categories:
+  // same, replace, candidate and race (see comments below).
+  // we consider only 3 cases regarding access sizes:
+  // equal, intersect and not intersect. initially I considered
+  // larger and smaller as well, it allowed to replace some
+  // 'candidates' with 'same' or 'replace', but I think
+  // it's just not worth it (performance- and complexity-wise).
+
+  Shadow old(0);
+  if (kShadowCnt == 1) {
+    int idx = 0;
+#include "tsan_update_shadow_word_inl.h"
+  } else if (kShadowCnt == 2) {
+    int idx = 0;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 1;
+#include "tsan_update_shadow_word_inl.h"
+  } else if (kShadowCnt == 4) {
+    int idx = 0;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 1;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 2;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 3;
+#include "tsan_update_shadow_word_inl.h"
+  } else if (kShadowCnt == 8) {
+    int idx = 0;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 1;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 2;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 3;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 4;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 5;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 6;
+#include "tsan_update_shadow_word_inl.h"
+    idx = 7;
+#include "tsan_update_shadow_word_inl.h"
+  } else {
+    CHECK(false);
+  }
+
+  // we did not find any races and had already stored
+  // the current access info, so we are done
+  if (LIKELY(store_word == 0))
+    return;
+  // choose a random candidate slot and replace it
+  StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word);
+  StatInc(thr, StatShadowReplace);
+  return;
+ RACE:
+  HandleRace(thr, shadow_mem, cur, old);
+  return;
+}
+
+ALWAYS_INLINE
+void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
+    int kAccessSizeLog, bool kAccessIsWrite) {
+  u64 *shadow_mem = (u64*)MemToShadow(addr);
+  DPrintf2("#%d: tsan::OnMemoryAccess: @%p %p size=%d"
+      " is_write=%d shadow_mem=%p {%llx, %llx, %llx, %llx}\n",
+      (int)thr->fast_state.tid(), (void*)pc, (void*)addr,
+      (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
+      shadow_mem[0], shadow_mem[1], shadow_mem[2], shadow_mem[3]);
+#if TSAN_DEBUG
+  if (!IsAppMem(addr)) {
+    Printf("Access to non app mem %lx\n", addr);
+    DCHECK(IsAppMem(addr));
+  }
+  if (!IsShadowMem((uptr)shadow_mem)) {
+    Printf("Bad shadow addr %p (%lx)\n", shadow_mem, addr);
+    DCHECK(IsShadowMem((uptr)shadow_mem));
+  }
+#endif
+
+  FastState fast_state = thr->fast_state;
+  if (fast_state.GetIgnoreBit())
+    return;
+  fast_state.IncrementEpoch();
+  thr->fast_state = fast_state;
+  Shadow cur(fast_state);
+  cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
+  cur.SetWrite(kAccessIsWrite);
+
+  // We must not store to the trace if we do not store to the shadow.
+  // That is, this call must be moved somewhere below.
+  TraceAddEvent(thr, fast_state.epoch(), EventTypeMop, pc);
+
+  MemoryAccessImpl(thr, addr, kAccessSizeLog, kAccessIsWrite, fast_state,
+      shadow_mem, cur);
+}
+
+static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                           u64 val) {
+  if (size == 0)
+    return;
+  // FIXME: fix me.
+  uptr offset = addr % kShadowCell;
+  if (offset) {
+    offset = kShadowCell - offset;
+    if (size <= offset)
+      return;
+    addr += offset;
+    size -= offset;
+  }
+  CHECK_EQ(addr % 8, 0);
+  CHECK(IsAppMem(addr));
+  CHECK(IsAppMem(addr + size - 1));
+  (void)thr;
+  (void)pc;
+  // Some programs mmap like hundreds of GBs but actually used a small part.
+  // So, it's better to report a false positive on the memory
+  // then to hang here senselessly.
+  const uptr kMaxResetSize = 1024*1024*1024;
+  if (size > kMaxResetSize)
+    size = kMaxResetSize;
+  size = (size + 7) & ~7;
+  u64 *p = (u64*)MemToShadow(addr);
+  CHECK(IsShadowMem((uptr)p));
+  CHECK(IsShadowMem((uptr)(p + size * kShadowCnt / kShadowCell - 1)));
+  // FIXME: may overwrite a part outside the region
+  for (uptr i = 0; i < size * kShadowCnt / kShadowCell; i++)
+    p[i] = val;
+}
+
+void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  MemoryRangeSet(thr, pc, addr, size, 0);
+}
+
+void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  MemoryAccessRange(thr, pc, addr, size, true);
+  MemoryRangeSet(thr, pc, addr, size, kShadowFreed);
+}
+
+void FuncEntry(ThreadState *thr, uptr pc) {
+  DCHECK_EQ(thr->in_rtl, 0);
+  StatInc(thr, StatFuncEnter);
+  DPrintf2("#%d: tsan::FuncEntry %p\n", (int)thr->fast_state.tid(), (void*)pc);
+  thr->fast_state.IncrementEpoch();
+  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeFuncEnter, pc);
+
+  // Shadow stack maintenance can be replaced with
+  // stack unwinding during trace switch (which presumably must be faster).
+  DCHECK(thr->shadow_stack_pos >= &thr->shadow_stack[0]);
+  DCHECK(thr->shadow_stack_pos < &thr->shadow_stack[kShadowStackSize]);
+  thr->shadow_stack_pos[0] = pc;
+  thr->shadow_stack_pos++;
+
+#if 1
+  // While we are testing on single-threaded benchmarks,
+  // emulate some synchronization activity.
+  // FIXME: remove me later.
+  if (((++thr->func_call_count) % 1000) == 0) {
+    thr->clock.set(thr->fast_state.tid(), thr->fast_state.epoch());
+    thr->fast_synch_epoch = thr->fast_state.epoch();
+  }
+#endif
+}
+
+void FuncExit(ThreadState *thr) {
+  DCHECK_EQ(thr->in_rtl, 0);
+  StatInc(thr, StatFuncExit);
+  DPrintf2("#%d: tsan::FuncExit\n", (int)thr->fast_state.tid());
+  thr->fast_state.IncrementEpoch();
+  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeFuncExit, 0);
+
+  DCHECK(thr->shadow_stack_pos > &thr->shadow_stack[0]);
+  DCHECK(thr->shadow_stack_pos < &thr->shadow_stack[kShadowStackSize]);
+  thr->shadow_stack_pos--;
+}
+
+void IgnoreCtl(ThreadState *thr, bool write, bool begin) {
+  DPrintf("#%d: IgnoreCtl(%d, %d)\n", thr->tid, write, begin);
+  thr->ignore_reads_and_writes += begin ? 1 : -1;
+  CHECK_GE(thr->ignore_reads_and_writes, 0);
+  if (thr->ignore_reads_and_writes)
+    thr->fast_state.SetIgnoreBit();
+  else
+    thr->fast_state.ClearIgnoreBit();
+}
+
+void InternalAllocStatAggregate(Context *ctx, ThreadState *thr) {
+  for (int i = 0; i < (int)MBlockTypeCount; i++) {
+    ctx->int_alloc_cnt[i] += thr->int_alloc_cnt[i];
+    ctx->int_alloc_siz[i] += thr->int_alloc_siz[i];
+    thr->int_alloc_cnt[i] = 0;
+    thr->int_alloc_siz[i] = 0;
+  }
+}
+
+#if TSAN_DEBUG
+void build_consistency_debug() {}
+#else
+void build_consistency_release() {}
+#endif
+
+#if TSAN_COLLECT_STATS
+void build_consistency_stats() {}
+#else
+void build_consistency_nostats() {}
+#endif
+
+#if TSAN_SHADOW_COUNT == 1
+void build_consistency_shadow1() {}
+#elif TSAN_SHADOW_COUNT == 2
+void build_consistency_shadow2() {}
+#elif TSAN_SHADOW_COUNT == 4
+void build_consistency_shadow4() {}
+#else
+void build_consistency_shadow8() {}
+#endif
+
+}  // namespace __tsan
+
+// Must be included in this file to make sure everything is inlined.
+#include "tsan_interface_inl.h"
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
new file mode 100644
index 0000000..1117904
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -0,0 +1,459 @@
+//===-- tsan_rtl.h ----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Main internal TSan header file.
+//
+// Ground rules:
+//   - C++ run-time should not be used (static CTORs, RTTI, exceptions, static
+//     function-scope locals)
+//   - All functions/classes/etc reside in namespace __tsan, except for those
+//     declared in tsan_interface.h.
+//   - Platform-specific files should be used instead of ifdefs (*).
+//   - No system headers included in header files (*).
+//   - Platform specific headres included only into platform-specific files (*).
+//
+//  (*) Except when inlining is critical for performance.
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_RTL_H
+#define TSAN_RTL_H
+
+#include "tsan_clock.h"
+#include "tsan_defs.h"
+#include "tsan_flags.h"
+#include "tsan_sync.h"
+#include "tsan_trace.h"
+#include "tsan_vector.h"
+#include "tsan_report.h"
+
+namespace __tsan {
+
+void Printf(const char *format, ...) FORMAT(1, 2);
+uptr Snprintf(char *buffer, uptr length, const char *format, ...)  FORMAT(3, 4);
+
+inline void NOINLINE breakhere() {
+  volatile int x = 42;
+  (void)x;
+}
+
+// FastState (from most significant bit):
+//   tid             : kTidBits
+//   epoch           : kClkBits
+//   unused          :
+//   ignore_bit      : 1
+class FastState {
+ public:
+  FastState(u64 tid, u64 epoch) {
+    x_ = tid << (64 - kTidBits);
+    x_ |= epoch << (64 - kTidBits - kClkBits);
+    CHECK(tid == this->tid());
+    CHECK(epoch == this->epoch());
+  }
+
+  explicit FastState(u64 x)
+      : x_(x) {
+  }
+
+  u64 tid() const {
+    u64 res = x_ >> (64 - kTidBits);
+    return res;
+  }
+  u64 epoch() const {
+    u64 res = (x_ << kTidBits) >> (64 - kClkBits);
+    return res;
+  };
+  void IncrementEpoch() {
+    // u64 old_epoch = epoch();
+    x_ += 1 << (64 - kTidBits - kClkBits);
+    // CHECK(old_epoch + 1 == epoch());
+  }
+  void SetIgnoreBit() { x_ |= 1; }
+  void ClearIgnoreBit() { x_ &= ~(u64)1; }
+  bool GetIgnoreBit() { return x_ & 1; }
+
+ private:
+  friend class Shadow;
+  u64 x_;
+};
+
+// Shadow (from most significant bit):
+//   tid             : kTidBits
+//   epoch           : kClkBits
+//   is_write        : 1
+//   size_log        : 2
+//   addr0           : 3
+class Shadow: public FastState {
+ public:
+  explicit Shadow(u64 x) : FastState(x) { }
+
+  explicit Shadow(const FastState &s) : FastState(s.x_) { }
+
+  void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
+    DCHECK_EQ(x_ & 31, 0);
+    DCHECK_LE(addr0, 7);
+    DCHECK_LE(kAccessSizeLog, 3);
+    x_ |= (kAccessSizeLog << 3) | addr0;
+    DCHECK_EQ(kAccessSizeLog, size_log());
+    DCHECK_EQ(addr0, this->addr0());
+  }
+
+  void SetWrite(unsigned kAccessIsWrite) {
+    DCHECK_EQ(x_ & 32, 0);
+    if (kAccessIsWrite)
+      x_ |= 32;
+    DCHECK_EQ(kAccessIsWrite, is_write());
+  }
+
+  bool IsZero() const { return x_ == 0; }
+  u64 raw() const { return x_; }
+
+  static inline bool TidsAreEqual(Shadow s1, Shadow s2) {
+    u64 shifted_xor = (s1.x_ ^ s2.x_) >> (64 - kTidBits);
+    DCHECK_EQ(shifted_xor == 0, s1.tid() == s2.tid());
+    return shifted_xor == 0;
+  }
+  static inline bool Addr0AndSizeAreEqual(Shadow s1, Shadow s2) {
+    u64 masked_xor = (s1.x_ ^ s2.x_) & 31;
+    return masked_xor == 0;
+  }
+
+  static bool TwoRangesIntersectSLOW(Shadow s1, Shadow s2) {
+    if (s1.addr0() == s2.addr0()) return true;
+    if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
+      return true;
+    if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0())
+      return true;
+    return false;
+  }
+
+  static inline bool TwoRangesIntersect(Shadow s1, Shadow s2,
+      unsigned kS2AccessSize) {
+    bool res = false;
+    u64 diff = s1.addr0() - s2.addr0();
+    if ((s64)diff < 0) {  // s1.addr0 < s2.addr0  // NOLINT
+      // if (s1.addr0() + size1) > s2.addr0()) return true;
+      if (s1.size() > -diff)  res = true;
+    } else {
+      // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
+      if (kS2AccessSize > diff) res = true;
+    }
+    DCHECK_EQ(res, TwoRangesIntersectSLOW(s1, s2));
+    DCHECK_EQ(res, TwoRangesIntersectSLOW(s2, s1));
+    return res;
+  }
+
+  // The idea behind the offset is as follows.
+  // Consider that we have 8 bool's contained within a single 8-byte block
+  // (mapped to a single shadow "cell"). Now consider that we write to the bools
+  // from a single thread (which we consider the common case).
+  // W/o offsetting each access will have to scan 4 shadow values at average
+  // to find the corresponding shadow value for the bool.
+  // With offsetting we start scanning shadow with the offset so that
+  // each access hits necessary shadow straight off (at least in an expected
+  // optimistic case).
+  // This logic works seamlessly for any layout of user data. For example,
+  // if user data is {int, short, char, char}, then accesses to the int are
+  // offsetted to 0, short - 4, 1st char - 6, 2nd char - 7. Hopefully, accesses
+  // from a single thread won't need to scan all 8 shadow values.
+  unsigned ComputeSearchOffset() {
+    return x_ & 7;
+  }
+  u64 addr0() const { return x_ & 7; }
+  u64 size() const { return 1ull << size_log(); }
+  bool is_write() const { return x_ & 32; }
+
+ private:
+  u64 size_log() const { return (x_ >> 3) & 3; }
+};
+
+// Freed memory.
+// As if 8-byte write by thread 0xff..f at epoch 0xff..f, races with everything.
+const u64 kShadowFreed = 0xfffffffffffffff8ull;
+
+const int kSigCount = 1024;
+const int kShadowStackSize = 1024;
+
+struct my_siginfo_t {
+  int opaque[128];
+};
+
+struct SignalDesc {
+  bool armed;
+  bool sigaction;
+  my_siginfo_t siginfo;
+};
+
+// This struct is stored in TLS.
+struct ThreadState {
+  FastState fast_state;
+  // Synch epoch represents the threads's epoch before the last synchronization
+  // action. It allows to reduce number of shadow state updates.
+  // For example, fast_synch_epoch=100, last write to addr X was at epoch=150,
+  // if we are processing write to X from the same thread at epoch=200,
+  // we do nothing, because both writes happen in the same 'synch epoch'.
+  // That is, if another memory access does not race with the former write,
+  // it does not race with the latter as well.
+  // QUESTION: can we can squeeze this into ThreadState::Fast?
+  // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are
+  // taken by epoch between synchs.
+  // This way we can save one load from tls.
+  u64 fast_synch_epoch;
+  // This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read.
+  // We do not distinguish beteween ignoring reads and writes
+  // for better performance.
+  int ignore_reads_and_writes;
+  uptr *shadow_stack_pos;
+  u64 *racy_shadow_addr;
+  u64 racy_state[2];
+  Trace trace;
+  uptr shadow_stack[kShadowStackSize];
+  ThreadClock clock;
+  u64 stat[StatCnt];
+  u64 int_alloc_cnt[MBlockTypeCount];
+  u64 int_alloc_siz[MBlockTypeCount];
+  const int tid;
+  int in_rtl;
+  int func_call_count;
+  const uptr stk_addr;
+  const uptr stk_size;
+  const uptr tls_addr;
+  const uptr tls_size;
+
+  DeadlockDetector deadlock_detector;
+
+  bool in_signal_handler;
+  int pending_signal_count;
+  SignalDesc pending_signals[kSigCount];
+
+  explicit ThreadState(Context *ctx, int tid, u64 epoch,
+                       uptr stk_addr, uptr stk_size,
+                       uptr tls_addr, uptr tls_size);
+};
+
+Context *CTX();
+extern THREADLOCAL char cur_thread_placeholder[];
+
+INLINE ThreadState *cur_thread() {
+  return reinterpret_cast<ThreadState *>(&cur_thread_placeholder);
+}
+
+enum ThreadStatus {
+  ThreadStatusInvalid,   // Non-existent thread, data is invalid.
+  ThreadStatusCreated,   // Created but not yet running.
+  ThreadStatusRunning,   // The thread is currently running.
+  ThreadStatusFinished,  // Joinable thread is finished but not yet joined.
+  ThreadStatusDead,      // Joined, but some info (trace) is still alive.
+};
+
+// An info about a thread that is hold for some time after its termination.
+struct ThreadDeadInfo {
+  Trace trace;
+};
+
+struct ThreadContext {
+  const int tid;
+  int unique_id;  // Non-rolling thread id.
+  uptr user_id;  // Some opaque user thread id (e.g. pthread_t).
+  ThreadState *thr;
+  ThreadStatus status;
+  bool detached;
+  int reuse_count;
+  SyncClock sync;
+  // Epoch at which the thread had started.
+  // If we see an event from the thread stamped by an older epoch,
+  // the event is from a dead thread that shared tid with this thread.
+  u64 epoch0;
+  u64 epoch1;
+  StackTrace creation_stack;
+  ThreadDeadInfo dead_info;
+  ThreadContext* dead_next;  // In dead thread list.
+
+  explicit ThreadContext(int tid);
+};
+
+struct RacyStacks {
+  MD5Hash hash[2];
+  bool operator==(const RacyStacks &other) const {
+    if (hash[0] == other.hash[0] && hash[1] == other.hash[1])
+      return true;
+    if (hash[0] == other.hash[1] && hash[1] == other.hash[0])
+      return true;
+    return false;
+  }
+};
+
+struct RacyAddress {
+  uptr addr_min;
+  uptr addr_max;
+};
+
+struct Context {
+  Context();
+
+  bool initialized;
+
+  SyncTab synctab;
+
+  Mutex report_mtx;
+  int nreported;
+  int nmissed_expected;
+
+  Mutex thread_mtx;
+  int thread_seq;
+  int unique_thread_seq;
+  int alive_threads;
+  int max_alive_threads;
+  ThreadContext *threads[kMaxTid];
+  int dead_list_size;
+  ThreadContext* dead_list_head;
+  ThreadContext* dead_list_tail;
+
+  Vector<RacyStacks> racy_stacks;
+  Vector<RacyAddress> racy_addresses;
+
+  Flags flags;
+
+  u64 stat[StatCnt];
+  u64 int_alloc_cnt[MBlockTypeCount];
+  u64 int_alloc_siz[MBlockTypeCount];
+};
+
+class ScopedInRtl {
+ public:
+  ScopedInRtl();
+  ~ScopedInRtl();
+ private:
+  ThreadState*thr_;
+  int in_rtl_;
+  int errno_;
+};
+
+class ScopedReport {
+ public:
+  explicit ScopedReport(ReportType typ);
+  ~ScopedReport();
+
+  void AddStack(const StackTrace *stack);
+  void AddMemoryAccess(uptr addr, Shadow s, const StackTrace *stack);
+  void AddThread(const ThreadContext *tctx);
+  void AddMutex(const SyncVar *s);
+  void AddLocation(uptr addr, uptr size);
+
+  const ReportDesc *GetReport() const;
+
+ private:
+  Context *ctx_;
+  ReportDesc *rep_;
+
+  ScopedReport(const ScopedReport&);
+  void operator = (const ScopedReport&);
+};
+
+void InternalAllocStatAggregate(Context *ctx, ThreadState *thr);
+void StatAggregate(u64 *dst, u64 *src);
+void StatOutput(u64 *stat);
+void ALWAYS_INLINE INLINE StatInc(ThreadState *thr, StatType typ, u64 n = 1) {
+  if (kCollectStats)
+    thr->stat[typ] += n;
+}
+
+void InitializeShadowMemory();
+void InitializeInterceptors();
+void InitializeDynamicAnnotations();
+void Die() NORETURN;
+
+void ReportRace(ThreadState *thr);
+bool OutputReport(const ScopedReport &srep, ReportStack *suppress_stack = 0);
+bool IsExpectedReport(uptr addr, uptr size);
+
+#if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
+# define DPrintf Printf
+#else
+# define DPrintf(...)
+#endif
+
+#if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 2
+# define DPrintf2 Printf
+#else
+# define DPrintf2(...)
+#endif
+
+void Initialize(ThreadState *thr);
+int Finalize(ThreadState *thr);
+
+void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
+    int kAccessSizeLog, bool kAccessIsWrite);
+void MemoryAccessImpl(ThreadState *thr, uptr addr,
+    int kAccessSizeLog, bool kAccessIsWrite, FastState fast_state,
+    u64 *shadow_mem, Shadow cur);
+void MemoryRead1Byte(ThreadState *thr, uptr pc, uptr addr);
+void MemoryWrite1Byte(ThreadState *thr, uptr pc, uptr addr);
+void MemoryRead8Byte(ThreadState *thr, uptr pc, uptr addr);
+void MemoryWrite8Byte(ThreadState *thr, uptr pc, uptr addr);
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
+                       uptr size, bool is_write);
+void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
+void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
+void IgnoreCtl(ThreadState *thr, bool write, bool begin);
+
+void FuncEntry(ThreadState *thr, uptr pc);
+void FuncExit(ThreadState *thr);
+
+int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
+void ThreadStart(ThreadState *thr, int tid);
+void ThreadFinish(ThreadState *thr);
+int ThreadTid(ThreadState *thr, uptr pc, uptr uid);
+void ThreadJoin(ThreadState *thr, uptr pc, int tid);
+void ThreadDetach(ThreadState *thr, uptr pc, int tid);
+void ThreadFinalize(ThreadState *thr);
+
+void MutexCreate(ThreadState *thr, uptr pc, uptr addr, bool rw, bool recursive);
+void MutexDestroy(ThreadState *thr, uptr pc, uptr addr);
+void MutexLock(ThreadState *thr, uptr pc, uptr addr);
+void MutexUnlock(ThreadState *thr, uptr pc, uptr addr);
+void MutexReadLock(ThreadState *thr, uptr pc, uptr addr);
+void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr);
+void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr);
+
+void Acquire(ThreadState *thr, uptr pc, uptr addr);
+void Release(ThreadState *thr, uptr pc, uptr addr);
+
+// The hacky call uses custom calling convention and an assembly thunk.
+// It is considerably faster that a normal call for the caller
+// if it is not executed (it is intended for slow paths from hot functions).
+// The trick is that the call preserves all registers and the compiler
+// does not treat it as a call.
+// If it does not work for you, use normal call.
+#if TSAN_DEBUG == 0
+// The caller may not create the stack frame for itself at all,
+// so we create a reserve stack frame for it (1024b must be enough).
+#define HACKY_CALL(f) \
+  __asm__ __volatile__("sub $0x400, %%rsp;" \
+                       "call " #f "_thunk;" \
+                       "add $0x400, %%rsp;" ::: "memory");
+#else
+#define HACKY_CALL(f) f()
+#endif
+
+extern "C" void __tsan_trace_switch();
+void ALWAYS_INLINE INLINE TraceAddEvent(ThreadState *thr, u64 epoch,
+                                        EventType typ, uptr addr) {
+  StatInc(thr, StatEvents);
+  if (UNLIKELY((epoch % kTracePartSize) == 0))
+    HACKY_CALL(__tsan_trace_switch);
+  Event *evp = &thr->trace.events[epoch % kTraceSize];
+  Event ev = (u64)addr | ((u64)typ << 61);
+  *evp = ev;
+}
+
+}  // namespace __tsan
+
+#endif  // TSAN_RTL_H
diff --git a/lib/tsan/rtl/tsan_rtl_amd64.S b/lib/tsan/rtl/tsan_rtl_amd64.S
new file mode 100644
index 0000000..2028ec5
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl_amd64.S
@@ -0,0 +1,71 @@
+.section .text
+
+.globl __tsan_trace_switch_thunk
+__tsan_trace_switch_thunk:
+  # Save scratch registers.
+  push %rax
+  push %rcx
+  push %rdx
+  push %rsi
+  push %rdi
+  push %r8
+  push %r9
+  push %r10
+  push %r11
+  # Align stack frame.
+  push %rbx  # non-scratch
+  mov %rsp, %rbx  # save current rsp
+  shr $4, %rsp  # clear 4 lsb, align to 16
+  shl $4, %rsp
+
+  call __tsan_trace_switch
+
+  # Unalign stack frame back.
+  mov %rbx, %rsp  # restore the original rsp
+  pop %rbx
+  # Restore scratch registers.
+  pop %r11
+  pop %r10
+  pop %r9
+  pop %r8
+  pop %rdi
+  pop %rsi
+  pop %rdx
+  pop %rcx
+  pop %rax
+  ret
+
+.globl __tsan_report_race_thunk
+__tsan_report_race_thunk:
+  # Save scratch registers.
+  push %rax
+  push %rcx
+  push %rdx
+  push %rsi
+  push %rdi
+  push %r8
+  push %r9
+  push %r10
+  push %r11
+  # Align stack frame.
+  push %rbx  # non-scratch
+  mov %rsp, %rbx  # save current rsp
+  shr $4, %rsp  # clear 4 lsb, align to 16
+  shl $4, %rsp
+
+  call __tsan_report_race
+
+  # Unalign stack frame back.
+  mov %rbx, %rsp  # restore the original rsp
+  pop %rbx
+  # Restore scratch registers.
+  pop %r11
+  pop %r10
+  pop %r9
+  pop %r8
+  pop %rdi
+  pop %rsi
+  pop %rdx
+  pop %rcx
+  pop %rax
+  ret
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
new file mode 100644
index 0000000..f144209
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -0,0 +1,209 @@
+//===-- tsan_rtl_mutex.cc ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_rtl.h"
+#include "tsan_sync.h"
+#include "tsan_report.h"
+#include "tsan_symbolize.h"
+
+namespace __tsan {
+
+void MutexCreate(ThreadState *thr, uptr pc, uptr addr,
+                 bool rw, bool recursive) {
+  Context *ctx = CTX();
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: MutexCreate %lx\n", thr->tid, addr);
+  StatInc(thr, StatMutexCreate);
+  MemoryWrite1Byte(thr, pc, addr);
+  SyncVar *s = ctx->synctab.GetAndLock(thr, pc, addr, true);
+  s->is_rw = rw;
+  s->is_recursive = recursive;
+  s->mtx.Unlock();
+}
+
+void MutexDestroy(ThreadState *thr, uptr pc, uptr addr) {
+  Context *ctx = CTX();
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: MutexDestroy %lx\n", thr->tid, addr);
+  StatInc(thr, StatMutexDestroy);
+  MemoryWrite1Byte(thr, pc, addr);
+  SyncVar *s = ctx->synctab.GetAndRemove(thr, pc, addr);
+  if (s == 0)
+    return;
+  if (s->owner_tid != SyncVar::kInvalidTid && !s->is_broken) {
+    s->is_broken = true;
+    ScopedReport rep(ReportTypeMutexDestroyLocked);
+    rep.AddMutex(s);
+    rep.AddLocation(s->addr, 1);
+    OutputReport(rep);
+  }
+  DestroyAndFree(s);
+}
+
+void MutexLock(ThreadState *thr, uptr pc, uptr addr) {
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: MutexLock %lx\n", thr->tid, addr);
+  MemoryRead1Byte(thr, pc, addr);
+  thr->fast_state.IncrementEpoch();
+  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeLock, addr);
+  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  if (s->owner_tid == SyncVar::kInvalidTid) {
+    CHECK_EQ(s->recursion, 0);
+    s->owner_tid = thr->tid;
+  } else if (s->owner_tid == thr->tid) {
+    CHECK_GT(s->recursion, 0);
+  } else {
+    Printf("ThreadSanitizer WARNING: double lock\n");
+  }
+  if (s->recursion == 0) {
+    StatInc(thr, StatMutexLock);
+    thr->clock.set(thr->tid, thr->fast_state.epoch());
+    thr->clock.acquire(&s->clock);
+    StatInc(thr, StatSyncAcquire);
+    thr->clock.acquire(&s->read_clock);
+    StatInc(thr, StatSyncAcquire);
+  } else if (!s->is_recursive) {
+    StatInc(thr, StatMutexRecLock);
+  }
+  s->recursion++;
+  s->mtx.Unlock();
+}
+
+void MutexUnlock(ThreadState *thr, uptr pc, uptr addr) {
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: MutexUnlock %lx\n", thr->tid, addr);
+  MemoryRead1Byte(thr, pc, addr);
+  thr->fast_state.IncrementEpoch();
+  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeUnlock, addr);
+  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  if (s->recursion == 0) {
+    if (!s->is_broken) {
+      s->is_broken = true;
+      Printf("ThreadSanitizer WARNING: unlock of unlocked mutex\n");
+    }
+  } else if (s->owner_tid != thr->tid) {
+    if (!s->is_broken) {
+      s->is_broken = true;
+      Printf("ThreadSanitizer WARNING: mutex unlock by another thread\n");
+    }
+  } else {
+    s->recursion--;
+    if (s->recursion == 0) {
+      StatInc(thr, StatMutexUnlock);
+      s->owner_tid = SyncVar::kInvalidTid;
+      thr->clock.set(thr->tid, thr->fast_state.epoch());
+      thr->fast_synch_epoch = thr->fast_state.epoch();
+      thr->clock.release(&s->clock);
+      StatInc(thr, StatSyncRelease);
+    } else {
+      StatInc(thr, StatMutexRecUnlock);
+    }
+  }
+  s->mtx.Unlock();
+}
+
+void MutexReadLock(ThreadState *thr, uptr pc, uptr addr) {
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: MutexReadLock %lx\n", thr->tid, addr);
+  StatInc(thr, StatMutexReadLock);
+  MemoryRead1Byte(thr, pc, addr);
+  thr->fast_state.IncrementEpoch();
+  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeRLock, addr);
+  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, false);
+  if (s->owner_tid != SyncVar::kInvalidTid)
+    Printf("ThreadSanitizer WARNING: read lock of a write locked mutex\n");
+  thr->clock.set(thr->tid, thr->fast_state.epoch());
+  thr->clock.acquire(&s->clock);
+  StatInc(thr, StatSyncAcquire);
+  s->mtx.ReadUnlock();
+}
+
+void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) {
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: MutexReadUnlock %lx\n", thr->tid, addr);
+  StatInc(thr, StatMutexReadUnlock);
+  MemoryRead1Byte(thr, pc, addr);
+  thr->fast_state.IncrementEpoch();
+  TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeRUnlock, addr);
+  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  if (s->owner_tid != SyncVar::kInvalidTid)
+    Printf("ThreadSanitizer WARNING: read unlock of a write locked mutex\n");
+  thr->clock.set(thr->tid, thr->fast_state.epoch());
+  thr->fast_synch_epoch = thr->fast_state.epoch();
+  thr->clock.release(&s->read_clock);
+  StatInc(thr, StatSyncRelease);
+  s->mtx.Unlock();
+}
+
+void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) {
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: MutexReadOrWriteUnlock %lx\n", thr->tid, addr);
+  MemoryRead1Byte(thr, pc, addr);
+  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  if (s->owner_tid == SyncVar::kInvalidTid) {
+    // Seems to be read unlock.
+    StatInc(thr, StatMutexReadUnlock);
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeRUnlock, addr);
+    thr->clock.set(thr->tid, thr->fast_state.epoch());
+    thr->fast_synch_epoch = thr->fast_state.epoch();
+    thr->clock.release(&s->read_clock);
+    StatInc(thr, StatSyncRelease);
+  } else if (s->owner_tid == thr->tid) {
+    // Seems to be write unlock.
+    CHECK_GT(s->recursion, 0);
+    s->recursion--;
+    if (s->recursion == 0) {
+      StatInc(thr, StatMutexUnlock);
+      s->owner_tid = SyncVar::kInvalidTid;
+      // FIXME: Refactor me, plz.
+      // The sequence of events is quite tricky and doubled in several places.
+      // First, it's a bug to increment the epoch w/o writing to the trace.
+      // Then, the acquire/release logic can be factored out as well.
+      thr->fast_state.IncrementEpoch();
+      TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeUnlock, addr);
+      thr->clock.set(thr->tid, thr->fast_state.epoch());
+      thr->fast_synch_epoch = thr->fast_state.epoch();
+      thr->clock.release(&s->clock);
+      StatInc(thr, StatSyncRelease);
+    } else {
+      StatInc(thr, StatMutexRecUnlock);
+    }
+  } else if (!s->is_broken) {
+    s->is_broken = true;
+    Printf("ThreadSanitizer WARNING: mutex unlock by another thread\n");
+  }
+  s->mtx.Unlock();
+}
+
+void Acquire(ThreadState *thr, uptr pc, uptr addr) {
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: Acquire %lx\n", thr->tid, addr);
+  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, false);
+  thr->clock.set(thr->tid, thr->fast_state.epoch());
+  thr->clock.acquire(&s->clock);
+  StatInc(thr, StatSyncAcquire);
+  s->mtx.ReadUnlock();
+}
+
+void Release(ThreadState *thr, uptr pc, uptr addr) {
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: Release %lx\n", thr->tid, addr);
+  SyncVar *s = CTX()->synctab.GetAndLock(thr, pc, addr, true);
+  thr->clock.set(thr->tid, thr->fast_state.epoch());
+  thr->clock.release(&s->clock);
+  StatInc(thr, StatSyncRelease);
+  s->mtx.Unlock();
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
new file mode 100644
index 0000000..a5c5417
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -0,0 +1,354 @@
+//===-- tsan_rtl.cc ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+#include "tsan_suppressions.h"
+#include "tsan_symbolize.h"
+#include "tsan_report.h"
+#include "tsan_sync.h"
+#include "tsan_mman.h"
+#include "tsan_flags.h"
+#include "tsan_placement_new.h"
+
+namespace __tsan {
+
+// Can be overriden by an application/test to intercept reports.
+bool WEAK OnReport(const ReportDesc *rep, bool suppressed) {
+  (void)rep;
+  return suppressed;
+}
+
+static void StackStripMain(ReportStack *stack) {
+  ReportStack *last_frame = 0;
+  ReportStack *last_frame2 = 0;
+  const char *prefix = "interception_wrap_";
+  uptr prefix_len = internal_strlen(prefix);
+  const char *path_prefix = flags()->strip_path_prefix;
+  uptr path_prefix_len = internal_strlen(path_prefix);
+  for (ReportStack *ent = stack; ent; ent = ent->next) {
+    if (ent->func && 0 == internal_strncmp(ent->func, prefix, prefix_len))
+      ent->func += prefix_len;
+    if (ent->file && 0 == internal_strncmp(ent->file, path_prefix,
+                                           path_prefix_len))
+      ent->file += path_prefix_len;
+    if (ent->file && ent->file[0] == '.' && ent->file[1] == '/')
+      ent->file += 2;
+    last_frame2 = last_frame;
+    last_frame = ent;
+  }
+
+  if (last_frame2 == 0)
+    return;
+  const char *last = last_frame->func;
+  const char *last2 = last_frame2->func;
+  // Strip frame above 'main'
+  if (last2 && 0 == internal_strcmp(last2, "main")) {
+    last_frame2->next = 0;
+  // Strip our internal thread start routine.
+  } else if (last && 0 == internal_strcmp(last, "__tsan_thread_start_func")) {
+    last_frame2->next = 0;
+  // Strip global ctors init.
+  } else if (last && 0 == internal_strcmp(last, "__do_global_ctors_aux")) {
+    last_frame2->next = 0;
+  // If both are 0, then we probably just failed to symbolize.
+  } else if (last || last2) {
+    // Ensure that we recovered stack completely. Trimmed stack
+    // can actually happen if we do not instrument some code,
+    // so it's only a DCHECK. However we must try hard to not miss it
+    // due to our fault.
+    Printf("Bottom stack frame of stack %lx is missed\n", stack->pc);
+  }
+}
+
+static ReportStack *SymbolizeStack(const StackTrace& trace) {
+  if (trace.IsEmpty())
+    return 0;
+  ReportStack *stack = 0;
+  for (uptr si = 0; si < trace.Size(); si++) {
+    // We obtain the return address, that is, address of the next instruction,
+    // so offset it by 1 byte.
+    bool is_last = (si == trace.Size() - 1);
+    ReportStack *ent = SymbolizeCode(trace.Get(si) - !is_last);
+    CHECK_NE(ent, 0);
+    ReportStack *last = ent;
+    while (last->next) {
+      last->pc += !is_last;
+      last = last->next;
+    }
+    last->pc += !is_last;
+    last->next = stack;
+    stack = ent;
+  }
+  StackStripMain(stack);
+  return stack;
+}
+
+ScopedReport::ScopedReport(ReportType typ) {
+  ctx_ = CTX();
+  void *mem = internal_alloc(MBlockReport, sizeof(ReportDesc));
+  rep_ = new(mem) ReportDesc;
+  rep_->typ = typ;
+  ctx_->report_mtx.Lock();
+}
+
+ScopedReport::~ScopedReport() {
+  ctx_->report_mtx.Unlock();
+  rep_->~ReportDesc();
+  internal_free(rep_);
+}
+
+void ScopedReport::AddStack(const StackTrace *stack) {
+  ReportStack **rs = rep_->stacks.PushBack();
+  *rs = SymbolizeStack(*stack);
+}
+
+void ScopedReport::AddMemoryAccess(uptr addr, Shadow s,
+                                   const StackTrace *stack) {
+  void *mem = internal_alloc(MBlockReportMop, sizeof(ReportMop));
+  ReportMop *mop = new(mem) ReportMop;
+  rep_->mops.PushBack(mop);
+  mop->tid = s.tid();
+  mop->addr = addr + s.addr0();
+  mop->size = s.size();
+  mop->write = s.is_write();
+  mop->nmutex = 0;
+  mop->stack = SymbolizeStack(*stack);
+}
+
+void ScopedReport::AddThread(const ThreadContext *tctx) {
+  void *mem = internal_alloc(MBlockReportThread, sizeof(ReportThread));
+  ReportThread *rt = new(mem) ReportThread();
+  rep_->threads.PushBack(rt);
+  rt->id = tctx->tid;
+  rt->running = (tctx->status == ThreadStatusRunning);
+  rt->stack = SymbolizeStack(tctx->creation_stack);
+}
+
+void ScopedReport::AddMutex(const SyncVar *s) {
+  void *mem = internal_alloc(MBlockReportMutex, sizeof(ReportMutex));
+  ReportMutex *rm = new(mem) ReportMutex();
+  rep_->mutexes.PushBack(rm);
+  rm->id = 42;
+  rm->stack = SymbolizeStack(s->creation_stack);
+}
+
+void ScopedReport::AddLocation(uptr addr, uptr size) {
+  ReportStack *symb = SymbolizeData(addr);
+  if (symb) {
+    void *mem = internal_alloc(MBlockReportLoc, sizeof(ReportLocation));
+    ReportLocation *loc = new(mem) ReportLocation();
+    rep_->locs.PushBack(loc);
+    loc->type = ReportLocationGlobal;
+    loc->addr = addr;
+    loc->size = size;
+    loc->tid = 0;
+    loc->name = symb->func;
+    loc->file = symb->file;
+    loc->line = symb->line;
+    loc->stack = 0;
+    internal_free(symb);
+  }
+}
+
+const ReportDesc *ScopedReport::GetReport() const {
+  return rep_;
+}
+
+static void RestoreStack(int tid, const u64 epoch, StackTrace *stk) {
+  ThreadContext *tctx = CTX()->threads[tid];
+  if (tctx == 0)
+    return;
+  Trace* trace = 0;
+  if (tctx->status == ThreadStatusRunning) {
+    CHECK(tctx->thr);
+    trace = &tctx->thr->trace;
+  } else if (tctx->status == ThreadStatusFinished
+      || tctx->status == ThreadStatusDead) {
+    trace = &tctx->dead_info.trace;
+  } else {
+    return;
+  }
+  Lock l(&trace->mtx);
+  const int partidx = (epoch / (kTraceSize / kTraceParts)) % kTraceParts;
+  TraceHeader* hdr = &trace->headers[partidx];
+  if (epoch < hdr->epoch0)
+    return;
+  const u64 eend = epoch % kTraceSize;
+  const u64 ebegin = eend / kTracePartSize * kTracePartSize;
+  DPrintf("#%d: RestoreStack epoch=%llu ebegin=%llu eend=%llu partidx=%d\n",
+      tid, epoch, ebegin, eend, partidx);
+  InternalScopedBuf<uptr> stack(1024);  // FIXME: de-hardcode 1024
+  for (uptr i = 0; i < hdr->stack0.Size(); i++) {
+    stack[i] = hdr->stack0.Get(i);
+    DPrintf2("  #%02lu: pc=%lx\n", i, stack[i]);
+  }
+  uptr pos = hdr->stack0.Size();
+  for (uptr i = ebegin; i <= eend; i++) {
+    Event ev = trace->events[i];
+    EventType typ = (EventType)(ev >> 61);
+    uptr pc = (uptr)(ev & 0xffffffffffffull);
+    DPrintf2("  %lu typ=%d pc=%lx\n", i, typ, pc);
+    if (typ == EventTypeMop) {
+      stack[pos] = pc;
+    } else if (typ == EventTypeFuncEnter) {
+      stack[pos++] = pc;
+    } else if (typ == EventTypeFuncExit) {
+      // Since we have full stacks, this should never happen.
+      DCHECK_GT(pos, 0);
+      if (pos > 0)
+        pos--;
+    }
+    for (uptr j = 0; j <= pos; j++)
+      DPrintf2("      #%lu: %lx\n", j, stack[j]);
+  }
+  if (pos == 0 && stack[0] == 0)
+    return;
+  pos++;
+  stk->Init(stack, pos);
+}
+
+static bool HandleRacyStacks(ThreadState *thr, const StackTrace (&traces)[2],
+    uptr addr_min, uptr addr_max) {
+  Context *ctx = CTX();
+  bool equal_stack = false;
+  RacyStacks hash = {};
+  if (flags()->suppress_equal_stacks) {
+    hash.hash[0] = md5_hash(traces[0].Begin(), traces[0].Size() * sizeof(uptr));
+    hash.hash[1] = md5_hash(traces[1].Begin(), traces[1].Size() * sizeof(uptr));
+    for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+      if (hash == ctx->racy_stacks[i]) {
+        DPrintf("ThreadSanitizer: suppressing report as doubled (stack)\n");
+        equal_stack = true;
+        break;
+      }
+    }
+  }
+  bool equal_address = false;
+  RacyAddress ra0 = {addr_min, addr_max};
+  if (flags()->suppress_equal_addresses) {
+    for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+      RacyAddress ra2 = ctx->racy_addresses[i];
+      uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+      uptr minend = min(ra0.addr_max, ra2.addr_max);
+      if (maxbeg < minend) {
+        DPrintf("ThreadSanitizer: suppressing report as doubled (addr)\n");
+        equal_address = true;
+        break;
+      }
+    }
+  }
+  if (equal_stack || equal_address) {
+    if (!equal_stack)
+      ctx->racy_stacks.PushBack(hash);
+    if (!equal_address)
+      ctx->racy_addresses.PushBack(ra0);
+    return true;
+  }
+  return false;
+}
+
+static void AddRacyStacks(ThreadState *thr, const StackTrace (&traces)[2],
+    uptr addr_min, uptr addr_max) {
+  Context *ctx = CTX();
+  if (flags()->suppress_equal_stacks) {
+    RacyStacks hash;
+    hash.hash[0] = md5_hash(traces[0].Begin(), traces[0].Size() * sizeof(uptr));
+    hash.hash[1] = md5_hash(traces[1].Begin(), traces[1].Size() * sizeof(uptr));
+    ctx->racy_stacks.PushBack(hash);
+  }
+  if (flags()->suppress_equal_addresses) {
+    RacyAddress ra0 = {addr_min, addr_max};
+    ctx->racy_addresses.PushBack(ra0);
+  }
+}
+
+bool OutputReport(const ScopedReport &srep, ReportStack *suppress_stack) {
+  const ReportDesc *rep = srep.GetReport();
+  bool suppressed = IsSuppressed(rep->typ, suppress_stack);
+  suppressed = OnReport(rep, suppressed);
+  if (suppressed)
+    return false;
+  PrintReport(rep);
+  CTX()->nreported++;
+  return true;
+}
+
+void ReportRace(ThreadState *thr) {
+  ScopedInRtl in_rtl;
+  uptr addr = ShadowToMem((uptr)thr->racy_shadow_addr);
+  uptr addr_min = 0;
+  uptr addr_max = 0;
+  {
+    uptr a0 = addr + Shadow(thr->racy_state[0]).addr0();
+    uptr a1 = addr + Shadow(thr->racy_state[1]).addr0();
+    uptr e0 = a0 + Shadow(thr->racy_state[0]).size();
+    uptr e1 = a1 + Shadow(thr->racy_state[1]).size();
+    addr_min = min(a0, a1);
+    addr_max = max(e0, e1);
+    if (IsExpectedReport(addr_min, addr_max - addr_min))
+      return;
+  }
+
+  Context *ctx = CTX();
+  Lock l0(&ctx->thread_mtx);
+
+  ScopedReport rep(ReportTypeRace);
+  const uptr nmop = thr->racy_state[1] == kShadowFreed ? 1 : 2;
+
+  StackTrace traces[2];
+  for (uptr i = 0; i < nmop; i++) {
+    Shadow s(thr->racy_state[i]);
+    RestoreStack(s.tid(), s.epoch(), &traces[i]);
+  }
+
+  if (HandleRacyStacks(thr, traces, addr_min, addr_max))
+    return;
+
+  for (uptr i = 0; i < nmop; i++) {
+    Shadow s(thr->racy_state[i]);
+    rep.AddMemoryAccess(addr, s, &traces[i]);
+  }
+
+  // Ensure that we have at least something for the current thread.
+  CHECK_EQ(traces[0].IsEmpty(), false);
+
+  for (uptr i = 0; i < nmop; i++) {
+    FastState s(thr->racy_state[i]);
+    ThreadContext *tctx = ctx->threads[s.tid()];
+    if (s.epoch() < tctx->epoch0 || s.epoch() > tctx->epoch1)
+      continue;
+    rep.AddThread(tctx);
+  }
+
+  if (!OutputReport(rep, rep.GetReport()->mops[0]->stack))
+    return;
+
+  AddRacyStacks(thr, traces, addr_min, addr_max);
+
+  // Bump the thread's clock a bit.
+  // This avoids series of similar reports between the same threads
+  // that happen close to each other (e.g. accessing several fields
+  // of the same object).
+  FastState s(thr->racy_state[1]);
+  thr->clock.set(s.tid(), s.epoch() + 100);
+}
+
+void CheckFailed(const char *file, int line, const char *cond, u64 v1, u64 v2) {
+  ScopedInRtl in_rtl;
+  Printf("FATAL: ThreadSanitizer CHECK failed: %s:%d \"%s\" (%llx, %llx)\n",
+         file, line, cond, v1, v2);
+  Die();
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
new file mode 100644
index 0000000..b874826
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -0,0 +1,368 @@
+//===-- tsan_rtl_thread.cc --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "tsan_placement_new.h"
+#include "tsan_platform.h"
+#include "tsan_report.h"
+#include "tsan_sync.h"
+
+namespace __tsan {
+
+const int kThreadQuarantineSize = 100;
+
+static void MaybeReportThreadLeak(ThreadContext *tctx) {
+  if (tctx->detached)
+    return;
+  if (tctx->status != ThreadStatusCreated
+      && tctx->status != ThreadStatusRunning
+      && tctx->status != ThreadStatusFinished)
+    return;
+  ScopedReport rep(ReportTypeThreadLeak);
+  rep.AddThread(tctx);
+  OutputReport(rep);
+}
+
+void ThreadFinalize(ThreadState *thr) {
+  CHECK_GT(thr->in_rtl, 0);
+  if (!flags()->report_thread_leaks)
+    return;
+  Context *ctx = CTX();
+  Lock l(&ctx->thread_mtx);
+  for (int i = 0; i < kMaxTid; i++) {
+    ThreadContext *tctx = ctx->threads[i];
+    if (tctx == 0)
+      continue;
+    MaybeReportThreadLeak(tctx);
+    DestroyAndFree(tctx);
+    ctx->threads[i] = 0;
+  }
+}
+
+static void ThreadDead(ThreadState *thr, ThreadContext *tctx) {
+  Context *ctx = CTX();
+  CHECK_GT(thr->in_rtl, 0);
+  CHECK(tctx->status == ThreadStatusRunning
+      || tctx->status == ThreadStatusFinished);
+  DPrintf("#%d: ThreadDead uid=%lu\n", thr->tid, tctx->user_id);
+  tctx->status = ThreadStatusDead;
+  tctx->user_id = 0;
+  tctx->sync.Reset();
+
+  // Put to dead list.
+  tctx->dead_next = 0;
+  if (ctx->dead_list_size == 0)
+    ctx->dead_list_head = tctx;
+  else
+    ctx->dead_list_tail->dead_next = tctx;
+  ctx->dead_list_tail = tctx;
+  ctx->dead_list_size++;
+}
+
+int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) {
+  CHECK_GT(thr->in_rtl, 0);
+  Context *ctx = CTX();
+  Lock l(&ctx->thread_mtx);
+  StatInc(thr, StatThreadCreate);
+  int tid = -1;
+  ThreadContext *tctx = 0;
+  if (ctx->dead_list_size > kThreadQuarantineSize
+      || ctx->thread_seq >= kMaxTid) {
+    if (ctx->dead_list_size == 0) {
+      Printf("ThreadSanitizer: %d thread limit exceeded. Dying.\n", kMaxTid);
+      Die();
+    }
+    StatInc(thr, StatThreadReuse);
+    tctx = ctx->dead_list_head;
+    ctx->dead_list_head = tctx->dead_next;
+    ctx->dead_list_size--;
+    if (ctx->dead_list_size == 0) {
+      CHECK_EQ(tctx->dead_next, 0);
+      ctx->dead_list_head = 0;
+    }
+    CHECK_EQ(tctx->status, ThreadStatusDead);
+    tctx->status = ThreadStatusInvalid;
+    tctx->reuse_count++;
+    tid = tctx->tid;
+    // The point to reclain dead_info.
+    // delete tctx->dead_info;
+  } else {
+    StatInc(thr, StatThreadMaxTid);
+    tid = ctx->thread_seq++;
+    void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadContext));
+    tctx = new(mem) ThreadContext(tid);
+    ctx->threads[tid] = tctx;
+  }
+  CHECK_NE(tctx, 0);
+  CHECK_GE(tid, 0);
+  CHECK_LT(tid, kMaxTid);
+  DPrintf("#%d: ThreadCreate tid=%d uid=%lu\n", thr->tid, tid, uid);
+  CHECK_EQ(tctx->status, ThreadStatusInvalid);
+  ctx->alive_threads++;
+  if (ctx->max_alive_threads < ctx->alive_threads) {
+    ctx->max_alive_threads++;
+    CHECK_EQ(ctx->max_alive_threads, ctx->alive_threads);
+    StatInc(thr, StatThreadMaxAlive);
+  }
+  tctx->status = ThreadStatusCreated;
+  tctx->thr = 0;
+  tctx->user_id = uid;
+  tctx->unique_id = ctx->unique_thread_seq++;
+  tctx->detached = detached;
+  if (tid) {
+    thr->fast_state.IncrementEpoch();
+    // Can't increment epoch w/o writing to the trace as well.
+    TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeMop, 0);
+    thr->clock.set(thr->tid, thr->fast_state.epoch());
+    thr->fast_synch_epoch = thr->fast_state.epoch();
+    thr->clock.release(&tctx->sync);
+    StatInc(thr, StatSyncRelease);
+
+    tctx->creation_stack.ObtainCurrent(thr, pc);
+  }
+  return tid;
+}
+
+void ThreadStart(ThreadState *thr, int tid) {
+  CHECK_GT(thr->in_rtl, 0);
+  uptr stk_addr = 0;
+  uptr stk_size = 0;
+  uptr tls_addr = 0;
+  uptr tls_size = 0;
+  GetThreadStackAndTls(&stk_addr, &stk_size, &tls_addr, &tls_size);
+
+  MemoryResetRange(thr, /*pc=*/ 1, stk_addr, stk_size);
+
+  // Check that the thr object is in tls;
+  const uptr thr_beg = (uptr)thr;
+  const uptr thr_end = (uptr)thr + sizeof(*thr);
+  CHECK_GE(thr_beg, tls_addr);
+  CHECK_LE(thr_beg, tls_addr + tls_size);
+  CHECK_GE(thr_end, tls_addr);
+  CHECK_LE(thr_end, tls_addr + tls_size);
+  // Since the thr object is huge, skip it.
+  MemoryResetRange(thr, /*pc=*/ 2, tls_addr, thr_beg - tls_addr);
+  MemoryResetRange(thr, /*pc=*/ 2, thr_end, tls_addr + tls_size - thr_end);
+
+  Lock l(&CTX()->thread_mtx);
+  ThreadContext *tctx = CTX()->threads[tid];
+  CHECK_NE(tctx, 0);
+  CHECK_EQ(tctx->status, ThreadStatusCreated);
+  tctx->status = ThreadStatusRunning;
+  tctx->epoch0 = tctx->epoch1 + 1;
+  tctx->epoch1 = (u64)-1;
+  new(thr) ThreadState(CTX(), tid, tctx->epoch0, stk_addr, stk_size,
+                       tls_addr, tls_size);
+  tctx->thr = thr;
+  thr->fast_synch_epoch = tctx->epoch0;
+  thr->clock.set(tid, tctx->epoch0);
+  thr->clock.acquire(&tctx->sync);
+  StatInc(thr, StatSyncAcquire);
+  DPrintf("#%d: ThreadStart epoch=%llu stk_addr=%lx stk_size=%lx "
+      "tls_addr=%lx tls_size=%lx\n",
+      tid, tctx->epoch0, stk_addr, stk_size, tls_addr, tls_size);
+}
+
+void ThreadFinish(ThreadState *thr) {
+  CHECK_GT(thr->in_rtl, 0);
+  StatInc(thr, StatThreadFinish);
+  // FIXME: Treat it as write.
+  if (thr->stk_addr && thr->stk_size)
+    MemoryResetRange(thr, /*pc=*/ 3, thr->stk_addr, thr->stk_size);
+  if (thr->tls_addr && thr->tls_size) {
+    const uptr thr_beg = (uptr)thr;
+    const uptr thr_end = (uptr)thr + sizeof(*thr);
+    // Since the thr object is huge, skip it.
+    MemoryResetRange(thr, /*pc=*/ 4, thr->tls_addr, thr_beg - thr->tls_addr);
+    MemoryResetRange(thr, /*pc=*/ 5,
+        thr_end, thr->tls_addr + thr->tls_size - thr_end);
+  }
+  Context *ctx = CTX();
+  Lock l(&ctx->thread_mtx);
+  ThreadContext *tctx = ctx->threads[thr->tid];
+  CHECK_NE(tctx, 0);
+  CHECK_EQ(tctx->status, ThreadStatusRunning);
+  CHECK_GT(ctx->alive_threads, 0);
+  ctx->alive_threads--;
+  if (tctx->detached) {
+    ThreadDead(thr, tctx);
+  } else {
+    thr->fast_state.IncrementEpoch();
+    // Can't increment epoch w/o writing to the trace as well.
+    TraceAddEvent(thr, thr->fast_state.epoch(), EventTypeMop, 0);
+    thr->clock.set(thr->tid, thr->fast_state.epoch());
+    thr->fast_synch_epoch = thr->fast_state.epoch();
+    thr->clock.release(&tctx->sync);
+    StatInc(thr, StatSyncRelease);
+    tctx->status = ThreadStatusFinished;
+  }
+
+  // Save from info about the thread.
+  // If dead_info will become dynamically allocated again,
+  // it is the point to allocate it.
+  // tctx->dead_info = new ThreadDeadInfo;
+  internal_memcpy(&tctx->dead_info.trace.events[0],
+      &thr->trace.events[0], sizeof(thr->trace.events));
+  for (int i = 0; i < kTraceParts; i++) {
+    tctx->dead_info.trace.headers[i].stack0.CopyFrom(
+        thr->trace.headers[i].stack0);
+  }
+  tctx->epoch1 = thr->clock.get(tctx->tid);
+
+  thr->~ThreadState();
+  StatAggregate(ctx->stat, thr->stat);
+  InternalAllocStatAggregate(ctx, thr);
+  tctx->thr = 0;
+}
+
+int ThreadTid(ThreadState *thr, uptr pc, uptr uid) {
+  CHECK_GT(thr->in_rtl, 0);
+  DPrintf("#%d: ThreadTid uid=%lu\n", thr->tid, uid);
+  Lock l(&CTX()->thread_mtx);
+  for (int tid = 0; tid < kMaxTid; tid++) {
+    if (CTX()->threads[tid] != 0
+        && CTX()->threads[tid]->user_id == uid
+        && CTX()->threads[tid]->status != ThreadStatusInvalid)
+      return tid;
+  }
+  return -1;
+}
+
+void ThreadJoin(ThreadState *thr, uptr pc, int tid) {
+  CHECK_GT(thr->in_rtl, 0);
+  CHECK_GT(tid, 0);
+  CHECK_LT(tid, kMaxTid);
+  DPrintf("#%d: ThreadJoin tid=%d\n", thr->tid, tid);
+  Context *ctx = CTX();
+  Lock l(&ctx->thread_mtx);
+  ThreadContext *tctx = ctx->threads[tid];
+  if (tctx->status == ThreadStatusInvalid) {
+    Printf("ThreadSanitizer: join of non-existent thread\n");
+    return;
+  }
+  CHECK_EQ(tctx->detached, false);
+  CHECK_EQ(tctx->status, ThreadStatusFinished);
+  thr->clock.acquire(&tctx->sync);
+  StatInc(thr, StatSyncAcquire);
+  ThreadDead(thr, tctx);
+}
+
+void ThreadDetach(ThreadState *thr, uptr pc, int tid) {
+  CHECK_GT(thr->in_rtl, 0);
+  CHECK_GT(tid, 0);
+  CHECK_LT(tid, kMaxTid);
+  Context *ctx = CTX();
+  Lock l(&ctx->thread_mtx);
+  ThreadContext *tctx = ctx->threads[tid];
+  if (tctx->status == ThreadStatusInvalid) {
+    Printf("ThreadSanitizer: detach of non-existent thread\n");
+    return;
+  }
+  if (tctx->status == ThreadStatusFinished) {
+    ThreadDead(thr, tctx);
+  } else {
+    tctx->detached = true;
+  }
+}
+
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
+                       uptr size, bool is_write) {
+  if (size == 0)
+    return;
+
+  u64 *shadow_mem = (u64*)MemToShadow(addr);
+  DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n",
+      thr->tid, (void*)pc, (void*)addr,
+      (int)size, is_write);
+
+#if TSAN_DEBUG
+  if (!IsAppMem(addr)) {
+    Printf("Access to non app mem %lx\n", addr);
+    DCHECK(IsAppMem(addr));
+  }
+  if (!IsAppMem(addr + size - 1)) {
+    Printf("Access to non app mem %lx\n", addr + size - 1);
+    DCHECK(IsAppMem(addr + size - 1));
+  }
+  if (!IsShadowMem((uptr)shadow_mem)) {
+    Printf("Bad shadow addr %p (%lx)\n", shadow_mem, addr);
+    DCHECK(IsShadowMem((uptr)shadow_mem));
+  }
+  if (!IsShadowMem((uptr)(shadow_mem + size * kShadowCnt / 8 - 1))) {
+    Printf("Bad shadow addr %p (%lx)\n",
+        shadow_mem + size * kShadowCnt / 8 - 1, addr + size - 1);
+    DCHECK(IsShadowMem((uptr)(shadow_mem + size * kShadowCnt / 8 - 1)));
+  }
+#endif
+
+  StatInc(thr, StatMopRange);
+
+  FastState fast_state = thr->fast_state;
+  if (fast_state.GetIgnoreBit())
+    return;
+
+  fast_state.IncrementEpoch();
+  thr->fast_state = fast_state;
+  TraceAddEvent(thr, fast_state.epoch(), EventTypeMop, pc);
+
+  bool unaligned = (addr % kShadowCell) != 0;
+
+  // Handle unaligned beginning, if any.
+  for (; addr % kShadowCell && size; addr++, size--) {
+    int const kAccessSizeLog = 0;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, fast_state,
+        shadow_mem, cur);
+  }
+  if (unaligned)
+    shadow_mem += kShadowCnt;
+  // Handle middle part, if any.
+  for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) {
+    int const kAccessSizeLog = 3;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, fast_state,
+        shadow_mem, cur);
+    shadow_mem += kShadowCnt;
+  }
+  // Handle ending, if any.
+  for (; size; addr++, size--) {
+    int const kAccessSizeLog = 0;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, fast_state,
+        shadow_mem, cur);
+  }
+}
+
+void MemoryRead1Byte(ThreadState *thr, uptr pc, uptr addr) {
+  MemoryAccess(thr, pc, addr, 0, 0);
+}
+
+void MemoryWrite1Byte(ThreadState *thr, uptr pc, uptr addr) {
+  MemoryAccess(thr, pc, addr, 0, 1);
+}
+
+void MemoryRead8Byte(ThreadState *thr, uptr pc, uptr addr) {
+  MemoryAccess(thr, pc, addr, 3, 0);
+}
+
+void MemoryWrite8Byte(ThreadState *thr, uptr pc, uptr addr) {
+  MemoryAccess(thr, pc, addr, 3, 1);
+}
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_stat.cc b/lib/tsan/rtl/tsan_stat.cc
new file mode 100644
index 0000000..21e1ee7
--- /dev/null
+++ b/lib/tsan/rtl/tsan_stat.cc
@@ -0,0 +1,247 @@
+//===-- tsan_stat.cc --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_stat.h"
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+void StatAggregate(u64 *dst, u64 *src) {
+  if (!kCollectStats)
+    return;
+  for (int i = 0; i < StatCnt; i++)
+    dst[i] += src[i];
+}
+
+void StatOutput(u64 *stat) {
+  if (!kCollectStats)
+    return;
+
+  stat[StatShadowNonZero] = stat[StatShadowProcessed] - stat[StatShadowZero];
+
+  static const char *name[StatCnt] = {};
+  name[StatMop]                          = "Memory accesses                   ";
+  name[StatMopRead]                      = "  Including reads                 ";
+  name[StatMopWrite]                     = "            writes                ";
+  name[StatMop1]                         = "  Including size 1                ";
+  name[StatMop2]                         = "            size 2                ";
+  name[StatMop4]                         = "            size 4                ";
+  name[StatMop8]                         = "            size 8                ";
+  name[StatMopSame]                      = "  Including same                  ";
+  name[StatMopRange]                     = "  Including range                 ";
+  name[StatShadowProcessed]              = "Shadow processed                  ";
+  name[StatShadowZero]                   = "  Including empty                 ";
+  name[StatShadowNonZero]                = "  Including non empty             ";
+  name[StatShadowSameSize]               = "  Including same size             ";
+  name[StatShadowIntersect]              = "            intersect             ";
+  name[StatShadowNotIntersect]           = "            not intersect         ";
+  name[StatShadowSameThread]             = "  Including same thread           ";
+  name[StatShadowAnotherThread]          = "            another thread        ";
+  name[StatShadowReplace]                = "  Including evicted               ";
+
+  name[StatFuncEnter]                    = "Function entries                  ";
+  name[StatFuncExit]                     = "Function exits                    ";
+  name[StatEvents]                       = "Events collected                  ";
+
+  name[StatThreadCreate]                 = "Total threads created             ";
+  name[StatThreadFinish]                 = "  threads finished                ";
+  name[StatThreadReuse]                  = "  threads reused                  ";
+  name[StatThreadMaxTid]                 = "  max tid                         ";
+  name[StatThreadMaxAlive]               = "  max alive threads               ";
+
+  name[StatMutexCreate]                  = "Mutexes created                   ";
+  name[StatMutexDestroy]                 = "  destroyed                       ";
+  name[StatMutexLock]                    = "  lock                            ";
+  name[StatMutexUnlock]                  = "  unlock                          ";
+  name[StatMutexRecLock]                 = "  recursive lock                  ";
+  name[StatMutexRecUnlock]               = "  recursive unlock                ";
+  name[StatMutexReadLock]                = "  read lock                       ";
+  name[StatMutexReadUnlock]              = "  read unlock                     ";
+
+  name[StatSyncCreated]                  = "Sync objects created              ";
+  name[StatSyncDestroyed]                = "             destroyed            ";
+  name[StatSyncAcquire]                  = "             acquired             ";
+  name[StatSyncRelease]                  = "             released             ";
+
+  name[StatAtomic]                       = "Atomic operations                 ";
+  name[StatAtomicLoad]                   = "  Including load                  ";
+  name[StatAtomicStore]                  = "            store                 ";
+  name[StatAtomicExchange]               = "            exchange              ";
+  name[StatAtomicFetchAdd]               = "            fetch_add             ";
+  name[StatAtomicCAS]                    = "            compare_exchange      ";
+  name[StatAtomicFence]                  = "            fence                 ";
+  name[StatAtomicRelaxed]                = "  Including relaxed               ";
+  name[StatAtomicConsume]                = "            consume               ";
+  name[StatAtomicAcquire]                = "            acquire               ";
+  name[StatAtomicRelease]                = "            release               ";
+  name[StatAtomicAcq_Rel]                = "            acq_rel               ";
+  name[StatAtomicSeq_Cst]                = "            seq_cst               ";
+  name[StatAtomic1]                      = "  Including size 1                ";
+  name[StatAtomic2]                      = "            size 2                ";
+  name[StatAtomic4]                      = "            size 4                ";
+  name[StatAtomic8]                      = "            size 8                ";
+
+  name[StatInterceptor]                  = "Interceptors                      ";
+  name[StatInt_malloc]                   = "  malloc                          ";
+  name[StatInt_calloc]                   = "  calloc                          ";
+  name[StatInt_realloc]                  = "  realloc                         ";
+  name[StatInt_free]                     = "  free                            ";
+  name[StatInt_cfree]                    = "  cfree                           ";
+  name[StatInt_mmap]                     = "  mmap                            ";
+  name[StatInt_mmap64]                   = "  mmap64                          ";
+  name[StatInt_munmap]                   = "  munmap                          ";
+  name[StatInt_memalign]                 = "  memalign                        ";
+  name[StatInt_valloc]                   = "  valloc                          ";
+  name[StatInt_pvalloc]                  = "  pvalloc                         ";
+  name[StatInt_posix_memalign]           = "  posix_memalign                  ";
+  name[StatInt__Znwm]                    = "  _Znwm                           ";
+  name[StatInt__ZnwmRKSt9nothrow_t]      = "  _ZnwmRKSt9nothrow_t             ";
+  name[StatInt__Znam]                    = "  _Znam                           ";
+  name[StatInt__ZnamRKSt9nothrow_t]      = "  _ZnamRKSt9nothrow_t             ";
+  name[StatInt__ZdlPv]                   = "  _ZdlPv                          ";
+  name[StatInt__ZdlPvRKSt9nothrow_t]     = "  _ZdlPvRKSt9nothrow_t            ";
+  name[StatInt__ZdaPv]                   = "  _ZdaPv                          ";
+  name[StatInt__ZdaPvRKSt9nothrow_t]     = "  _ZdaPvRKSt9nothrow_t            ";
+  name[StatInt_strlen]                   = "  strlen                          ";
+  name[StatInt_memset]                   = "  memset                          ";
+  name[StatInt_memcpy]                   = "  memcpy                          ";
+  name[StatInt_strcmp]                   = "  strcmp                          ";
+  name[StatInt_memchr]                   = "  memchr                          ";
+  name[StatInt_memrchr]                  = "  memrchr                         ";
+  name[StatInt_memmove]                  = "  memmove                         ";
+  name[StatInt_memcmp]                   = "  memcmp                          ";
+  name[StatInt_strchr]                   = "  strchr                          ";
+  name[StatInt_strchrnul]                = "  strchrnul                       ";
+  name[StatInt_strrchr]                  = "  strrchr                         ";
+  name[StatInt_strncmp]                  = "  strncmp                         ";
+  name[StatInt_strcpy]                   = "  strcpy                          ";
+  name[StatInt_strncpy]                  = "  strncpy                         ";
+  name[StatInt_strstr]                   = "  strstr                          ";
+  name[StatInt_atexit]                   = "  atexit                          ";
+  name[StatInt___cxa_guard_acquire]      = "  __cxa_guard_acquire             ";
+  name[StatInt___cxa_guard_release]      = "  __cxa_guard_release             ";
+  name[StatInt_pthread_create]           = "  pthread_create                  ";
+  name[StatInt_pthread_join]             = "  pthread_join                    ";
+  name[StatInt_pthread_detach]           = "  pthread_detach                  ";
+  name[StatInt_pthread_mutex_init]       = "  pthread_mutex_init              ";
+  name[StatInt_pthread_mutex_destroy]    = "  pthread_mutex_destroy           ";
+  name[StatInt_pthread_mutex_lock]       = "  pthread_mutex_lock              ";
+  name[StatInt_pthread_mutex_trylock]    = "  pthread_mutex_trylock           ";
+  name[StatInt_pthread_mutex_timedlock]  = "  pthread_mutex_timedlock         ";
+  name[StatInt_pthread_mutex_unlock]     = "  pthread_mutex_unlock            ";
+  name[StatInt_pthread_spin_init]        = "  pthread_spin_init               ";
+  name[StatInt_pthread_spin_destroy]     = "  pthread_spin_destroy            ";
+  name[StatInt_pthread_spin_lock]        = "  pthread_spin_lock               ";
+  name[StatInt_pthread_spin_trylock]     = "  pthread_spin_trylock            ";
+  name[StatInt_pthread_spin_unlock]      = "  pthread_spin_unlock             ";
+  name[StatInt_pthread_rwlock_init]      = "  pthread_rwlock_init             ";
+  name[StatInt_pthread_rwlock_destroy]   = "  pthread_rwlock_destroy          ";
+  name[StatInt_pthread_rwlock_rdlock]    = "  pthread_rwlock_rdlock           ";
+  name[StatInt_pthread_rwlock_tryrdlock] = "  pthread_rwlock_tryrdlock        ";
+  name[StatInt_pthread_rwlock_timedrdlock]
+                                         = "  pthread_rwlock_timedrdlock      ";
+  name[StatInt_pthread_rwlock_wrlock]    = "  pthread_rwlock_wrlock           ";
+  name[StatInt_pthread_rwlock_trywrlock] = "  pthread_rwlock_trywrlock        ";
+  name[StatInt_pthread_rwlock_timedwrlock]
+                                         = "  pthread_rwlock_timedwrlock      ";
+  name[StatInt_pthread_rwlock_unlock]    = "  pthread_rwlock_unlock           ";
+  name[StatInt_pthread_cond_init]        = "  pthread_cond_init               ";
+  name[StatInt_pthread_cond_destroy]     = "  pthread_cond_destroy            ";
+  name[StatInt_pthread_cond_signal]      = "  pthread_cond_signal             ";
+  name[StatInt_pthread_cond_broadcast]   = "  pthread_cond_broadcast          ";
+  name[StatInt_pthread_cond_wait]        = "  pthread_cond_wait               ";
+  name[StatInt_pthread_cond_timedwait]   = "  pthread_cond_timedwait          ";
+  name[StatInt_pthread_barrier_init]     = "  pthread_barrier_init            ";
+  name[StatInt_pthread_barrier_destroy]  = "  pthread_barrier_destroy         ";
+  name[StatInt_pthread_barrier_wait]     = "  pthread_barrier_wait            ";
+  name[StatInt_pthread_once]             = "  pthread_once                    ";
+  name[StatInt_sem_init]                 = "  sem_init                        ";
+  name[StatInt_sem_destroy]              = "  sem_destroy                     ";
+  name[StatInt_sem_wait]                 = "  sem_wait                        ";
+  name[StatInt_sem_trywait]              = "  sem_trywait                     ";
+  name[StatInt_sem_timedwait]            = "  sem_timedwait                   ";
+  name[StatInt_sem_post]                 = "  sem_post                        ";
+  name[StatInt_sem_getvalue]             = "  sem_getvalue                    ";
+  name[StatInt_read]                     = "  read                            ";
+  name[StatInt_pread]                    = "  pread                           ";
+  name[StatInt_pread64]                  = "  pread64                         ";
+  name[StatInt_readv]                    = "  readv                           ";
+  name[StatInt_preadv64]                 = "  preadv64                        ";
+  name[StatInt_write]                    = "  write                           ";
+  name[StatInt_pwrite]                   = "  pwrite                          ";
+  name[StatInt_pwrite64]                 = "  pwrite64                        ";
+  name[StatInt_writev]                   = "  writev                          ";
+  name[StatInt_pwritev64]                = "  pwritev64                       ";
+  name[StatInt_send]                     = "  send                            ";
+  name[StatInt_sendmsg]                  = "  sendmsg                         ";
+  name[StatInt_recv]                     = "  recv                            ";
+  name[StatInt_recvmsg]                  = "  recvmsg                         ";
+  name[StatInt_unlink]                   = "  unlink                          ";
+  name[StatInt_fopen]                    = "  fopen                           ";
+  name[StatInt_fread]                    = "  fread                           ";
+  name[StatInt_fwrite]                   = "  fwrite                          ";
+  name[StatInt_puts]                     = "  puts                            ";
+  name[StatInt_rmdir]                    = "  rmdir                           ";
+  name[StatInt_opendir]                  = "  opendir                         ";
+  name[StatInt_epoll_ctl]                = "  epoll_ctl                       ";
+  name[StatInt_epoll_wait]               = "  epoll_wait                      ";
+  name[StatInt_sigaction]                = "  sigaction                       ";
+
+  name[StatAnnotation]                   = "Dynamic annotations               ";
+  name[StatAnnotateHappensBefore]        = "  HappensBefore                   ";
+  name[StatAnnotateHappensAfter]         = "  HappensAfter                    ";
+  name[StatAnnotateCondVarSignal]        = "  CondVarSignal                   ";
+  name[StatAnnotateCondVarSignalAll]     = "  CondVarSignalAll                ";
+  name[StatAnnotateMutexIsNotPHB]        = "  MutexIsNotPHB                   ";
+  name[StatAnnotateCondVarWait]          = "  CondVarWait                     ";
+  name[StatAnnotateRWLockCreate]         = "  RWLockCreate                    ";
+  name[StatAnnotateRWLockDestroy]        = "  RWLockDestroy                   ";
+  name[StatAnnotateRWLockAcquired]       = "  RWLockAcquired                  ";
+  name[StatAnnotateRWLockReleased]       = "  RWLockReleased                  ";
+  name[StatAnnotateTraceMemory]          = "  TraceMemory                     ";
+  name[StatAnnotateFlushState]           = "  FlushState                      ";
+  name[StatAnnotateNewMemory]            = "  NewMemory                       ";
+  name[StatAnnotateNoOp]                 = "  NoOp                            ";
+  name[StatAnnotateFlushExpectedRaces]   = "  FlushExpectedRaces              ";
+  name[StatAnnotateEnableRaceDetection]  = "  EnableRaceDetection             ";
+  name[StatAnnotateMutexIsUsedAsCondVar] = "  MutexIsUsedAsCondVar            ";
+  name[StatAnnotatePCQGet]               = "  PCQGet                          ";
+  name[StatAnnotatePCQPut]               = "  PCQPut                          ";
+  name[StatAnnotatePCQDestroy]           = "  PCQDestroy                      ";
+  name[StatAnnotatePCQCreate]            = "  PCQCreate                       ";
+  name[StatAnnotateExpectRace]           = "  ExpectRace                      ";
+  name[StatAnnotateBenignRaceSized]      = "  BenignRaceSized                 ";
+  name[StatAnnotateBenignRace]           = "  BenignRace                      ";
+  name[StatAnnotateIgnoreReadsBegin]     = "  IgnoreReadsBegin                ";
+  name[StatAnnotateIgnoreReadsEnd]       = "  IgnoreReadsEnd                  ";
+  name[StatAnnotateIgnoreWritesBegin]    = "  IgnoreWritesBegin               ";
+  name[StatAnnotateIgnoreWritesEnd]      = "  IgnoreWritesEnd                 ";
+  name[StatAnnotatePublishMemoryRange]   = "  PublishMemoryRange              ";
+  name[StatAnnotateUnpublishMemoryRange] = "  UnpublishMemoryRange            ";
+  name[StatAnnotateThreadName]           = "  ThreadName                      ";
+
+  name[StatMtxTotal]                     = "Contentionz                       ";
+  name[StatMtxTrace]                     = "  Trace                           ";
+  name[StatMtxThreads]                   = "  Threads                         ";
+  name[StatMtxReport]                    = "  Report                          ";
+  name[StatMtxSyncVar]                   = "  SyncVar                         ";
+  name[StatMtxSyncTab]                   = "  SyncTab                         ";
+  name[StatMtxSlab]                      = "  Slab                            ";
+  name[StatMtxAtExit]                    = "  Atexit                          ";
+  name[StatMtxAnnotations]               = "  Annotations                     ";
+
+  Printf("Statistics:\n");
+  for (int i = 0; i < StatCnt; i++)
+    Printf("%s: %llu\n", name[i], stat[i]);
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_stat.h b/lib/tsan/rtl/tsan_stat.h
new file mode 100644
index 0000000..b660b48
--- /dev/null
+++ b/lib/tsan/rtl/tsan_stat.h
@@ -0,0 +1,245 @@
+//===-- tsan_stat.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_STAT_H
+#define TSAN_STAT_H
+
+namespace __tsan {
+
+enum StatType {
+  // Memory access processing related stuff.
+  StatMop,
+  StatMopRead,
+  StatMopWrite,
+  StatMop1,  // These must be consequtive.
+  StatMop2,
+  StatMop4,
+  StatMop8,
+  StatMopSame,
+  StatMopRange,
+  StatShadowProcessed,
+  StatShadowZero,
+  StatShadowNonZero,  // Derived.
+  StatShadowSameSize,
+  StatShadowIntersect,
+  StatShadowNotIntersect,
+  StatShadowSameThread,
+  StatShadowAnotherThread,
+  StatShadowReplace,
+
+  // Func processing.
+  StatFuncEnter,
+  StatFuncExit,
+
+  // Trace processing.
+  StatEvents,
+
+  // Threads.
+  StatThreadCreate,
+  StatThreadFinish,
+  StatThreadReuse,
+  StatThreadMaxTid,
+  StatThreadMaxAlive,
+
+  // Mutexes.
+  StatMutexCreate,
+  StatMutexDestroy,
+  StatMutexLock,
+  StatMutexUnlock,
+  StatMutexRecLock,
+  StatMutexRecUnlock,
+  StatMutexReadLock,
+  StatMutexReadUnlock,
+
+  // Synchronization.
+  StatSyncCreated,
+  StatSyncDestroyed,
+  StatSyncAcquire,
+  StatSyncRelease,
+
+  // Atomics.
+  StatAtomic,
+  StatAtomicLoad,
+  StatAtomicStore,
+  StatAtomicExchange,
+  StatAtomicFetchAdd,
+  StatAtomicCAS,
+  StatAtomicFence,
+  StatAtomicRelaxed,
+  StatAtomicConsume,
+  StatAtomicAcquire,
+  StatAtomicRelease,
+  StatAtomicAcq_Rel,
+  StatAtomicSeq_Cst,
+  StatAtomic1,
+  StatAtomic2,
+  StatAtomic4,
+  StatAtomic8,
+
+  // Interceptors.
+  StatInterceptor,
+  StatInt_malloc,
+  StatInt_calloc,
+  StatInt_realloc,
+  StatInt_free,
+  StatInt_cfree,
+  StatInt_mmap,
+  StatInt_mmap64,
+  StatInt_munmap,
+  StatInt_memalign,
+  StatInt_valloc,
+  StatInt_pvalloc,
+  StatInt_posix_memalign,
+  StatInt__Znwm,
+  StatInt__ZnwmRKSt9nothrow_t,
+  StatInt__Znam,
+  StatInt__ZnamRKSt9nothrow_t,
+  StatInt__ZdlPv,
+  StatInt__ZdlPvRKSt9nothrow_t,
+  StatInt__ZdaPv,
+  StatInt__ZdaPvRKSt9nothrow_t,
+  StatInt_strlen,
+  StatInt_memset,
+  StatInt_memcpy,
+  StatInt_strcmp,
+  StatInt_memchr,
+  StatInt_memrchr,
+  StatInt_memmove,
+  StatInt_memcmp,
+  StatInt_strchr,
+  StatInt_strchrnul,
+  StatInt_strrchr,
+  StatInt_strncmp,
+  StatInt_strcpy,
+  StatInt_strncpy,
+  StatInt_strstr,
+  StatInt_atexit,
+  StatInt___cxa_guard_acquire,
+  StatInt___cxa_guard_release,
+  StatInt_pthread_create,
+  StatInt_pthread_join,
+  StatInt_pthread_detach,
+  StatInt_pthread_mutex_init,
+  StatInt_pthread_mutex_destroy,
+  StatInt_pthread_mutex_lock,
+  StatInt_pthread_mutex_trylock,
+  StatInt_pthread_mutex_timedlock,
+  StatInt_pthread_mutex_unlock,
+  StatInt_pthread_spin_init,
+  StatInt_pthread_spin_destroy,
+  StatInt_pthread_spin_lock,
+  StatInt_pthread_spin_trylock,
+  StatInt_pthread_spin_unlock,
+  StatInt_pthread_rwlock_init,
+  StatInt_pthread_rwlock_destroy,
+  StatInt_pthread_rwlock_rdlock,
+  StatInt_pthread_rwlock_tryrdlock,
+  StatInt_pthread_rwlock_timedrdlock,
+  StatInt_pthread_rwlock_wrlock,
+  StatInt_pthread_rwlock_trywrlock,
+  StatInt_pthread_rwlock_timedwrlock,
+  StatInt_pthread_rwlock_unlock,
+  StatInt_pthread_cond_init,
+  StatInt_pthread_cond_destroy,
+  StatInt_pthread_cond_signal,
+  StatInt_pthread_cond_broadcast,
+  StatInt_pthread_cond_wait,
+  StatInt_pthread_cond_timedwait,
+  StatInt_pthread_barrier_init,
+  StatInt_pthread_barrier_destroy,
+  StatInt_pthread_barrier_wait,
+  StatInt_pthread_once,
+  StatInt_sem_init,
+  StatInt_sem_destroy,
+  StatInt_sem_wait,
+  StatInt_sem_trywait,
+  StatInt_sem_timedwait,
+  StatInt_sem_post,
+  StatInt_sem_getvalue,
+  StatInt_read,
+  StatInt_pread,
+  StatInt_pread64,
+  StatInt_readv,
+  StatInt_preadv64,
+  StatInt_write,
+  StatInt_pwrite,
+  StatInt_pwrite64,
+  StatInt_writev,
+  StatInt_pwritev64,
+  StatInt_send,
+  StatInt_sendmsg,
+  StatInt_recv,
+  StatInt_recvmsg,
+  StatInt_unlink,
+  StatInt_fopen,
+  StatInt_fread,
+  StatInt_fwrite,
+  StatInt_puts,
+  StatInt_rmdir,
+  StatInt_opendir,
+  StatInt_epoll_ctl,
+  StatInt_epoll_wait,
+  StatInt_sigaction,
+
+  // Dynamic annotations.
+  StatAnnotation,
+  StatAnnotateHappensBefore,
+  StatAnnotateHappensAfter,
+  StatAnnotateCondVarSignal,
+  StatAnnotateCondVarSignalAll,
+  StatAnnotateMutexIsNotPHB,
+  StatAnnotateCondVarWait,
+  StatAnnotateRWLockCreate,
+  StatAnnotateRWLockDestroy,
+  StatAnnotateRWLockAcquired,
+  StatAnnotateRWLockReleased,
+  StatAnnotateTraceMemory,
+  StatAnnotateFlushState,
+  StatAnnotateNewMemory,
+  StatAnnotateNoOp,
+  StatAnnotateFlushExpectedRaces,
+  StatAnnotateEnableRaceDetection,
+  StatAnnotateMutexIsUsedAsCondVar,
+  StatAnnotatePCQGet,
+  StatAnnotatePCQPut,
+  StatAnnotatePCQDestroy,
+  StatAnnotatePCQCreate,
+  StatAnnotateExpectRace,
+  StatAnnotateBenignRaceSized,
+  StatAnnotateBenignRace,
+  StatAnnotateIgnoreReadsBegin,
+  StatAnnotateIgnoreReadsEnd,
+  StatAnnotateIgnoreWritesBegin,
+  StatAnnotateIgnoreWritesEnd,
+  StatAnnotatePublishMemoryRange,
+  StatAnnotateUnpublishMemoryRange,
+  StatAnnotateThreadName,
+
+  // Internal mutex contentionz.
+  StatMtxTotal,
+  StatMtxTrace,
+  StatMtxThreads,
+  StatMtxReport,
+  StatMtxSyncVar,
+  StatMtxSyncTab,
+  StatMtxSlab,
+  StatMtxAnnotations,
+  StatMtxAtExit,
+
+  // This must be the last.
+  StatCnt,
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_STAT_H
diff --git a/lib/tsan/rtl/tsan_suppressions.cc b/lib/tsan/rtl/tsan_suppressions.cc
new file mode 100644
index 0000000..d743614
--- /dev/null
+++ b/lib/tsan/rtl/tsan_suppressions.cc
@@ -0,0 +1,173 @@
+//===-- tsan_suppressions.cc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_suppressions.h"
+#include "tsan_rtl.h"
+#include "tsan_flags.h"
+#include "tsan_mman.h"
+#include "tsan_platform.h"
+
+namespace __tsan {
+
+static Suppression *g_suppressions;
+
+static char *ReadFile(const char *filename) {
+  if (filename == 0 || filename[0] == 0)
+    return 0;
+  InternalScopedBuf<char> tmp(4*1024);
+  if (filename[0] == '/')
+    Snprintf(tmp, tmp.Size(), "%s", filename);
+  else
+    Snprintf(tmp, tmp.Size(), "%s/%s", internal_getpwd(), filename);
+  fd_t fd = internal_open(tmp, false);
+  if (fd == kInvalidFd) {
+    Printf("ThreadSanitizer: failed to open suppressions file '%s'\n",
+        tmp.Ptr());
+    Die();
+  }
+  const uptr fsize = internal_filesize(fd);
+  if (fsize == (uptr)-1) {
+    Printf("ThreadSanitizer: failed to stat suppressions file '%s'\n",
+        tmp.Ptr());
+    Die();
+  }
+  char *buf = (char*)internal_alloc(MBlockSuppression, fsize + 1);
+  if (fsize != internal_read(fd, buf, fsize)) {
+    Printf("ThreadSanitizer: failed to read suppressions file '%s'\n",
+        tmp.Ptr());
+    Die();
+  }
+  internal_close(fd);
+  buf[fsize] = 0;
+  return buf;
+}
+
+bool SuppressionMatch(char *templ, const char *str) {
+  char *tpos;
+  const char *spos;
+  while (templ && templ[0]) {
+    if (templ[0] == '*') {
+      templ++;
+      continue;
+    }
+    if (str[0] == 0)
+      return false;
+    tpos = (char*)internal_strchr(templ, '*');
+    if (tpos != 0)
+      tpos[0] = 0;
+    spos = internal_strstr(str, templ);
+    str = spos + internal_strlen(templ);
+    templ = tpos;
+    if (tpos)
+      tpos[0] = '*';
+    if (spos == 0)
+      return false;
+  }
+  return true;
+}
+
+Suppression *SuppressionParse(const char* supp) {
+  Suppression *head = 0;
+  const char *line = supp;
+  while (line) {
+    while (line[0] == ' ' || line[0] == '\t')
+      line++;
+    const char *end = internal_strchr(line, '\n');
+    if (end == 0)
+      end = line + internal_strlen(line);
+    if (line != end && line[0] != '#') {
+      const char *end2 = end;
+      while (line != end2 && (end2[-1] == ' ' || end2[-1] == '\t'))
+        end2--;
+      SuppressionType stype;
+      if (0 == internal_strncmp(line, "race:", sizeof("race:") - 1)) {
+        stype = SuppressionRace;
+        line += sizeof("race:") - 1;
+      } else if (0 == internal_strncmp(line, "thread:",
+          sizeof("thread:") - 1)) {
+        stype = SuppressionThread;
+        line += sizeof("thread:") - 1;
+      } else if (0 == internal_strncmp(line, "mutex:",
+          sizeof("mutex:") - 1)) {
+        stype = SuppressionMutex;
+        line += sizeof("mutex:") - 1;
+      } else if (0 == internal_strncmp(line, "signal:",
+          sizeof("signal:") - 1)) {
+        stype = SuppressionSignal;
+        line += sizeof("signal:") - 1;
+      } else {
+        Printf("ThreadSanitizer: failed to parse suppressions file\n");
+        Die();
+      }
+      Suppression *s = (Suppression*)internal_alloc(MBlockSuppression,
+          sizeof(Suppression));
+      s->next = head;
+      head = s;
+      s->type = stype;
+      s->func = (char*)internal_alloc(MBlockSuppression, end2 - line + 1);
+      internal_memcpy(s->func, line, end2 - line);
+      s->func[end2 - line] = 0;
+    }
+    if (end[0] == 0)
+      break;
+    line = end + 1;
+  }
+  return head;
+}
+
+void SuppressionFree(Suppression *supp) {
+  while (supp) {
+    Suppression *tmp = supp;
+    supp = tmp->next;
+    internal_free(tmp->func);
+    internal_free(tmp);
+  }
+}
+
+void InitializeSuppressions() {
+  char *supp = ReadFile(flags()->suppressions);
+  g_suppressions = SuppressionParse(supp);
+}
+
+void FinalizeSuppressions() {
+  SuppressionFree(g_suppressions);
+  g_suppressions = 0;
+}
+
+bool IsSuppressed(ReportType typ, const ReportStack *stack) {
+  if (g_suppressions == 0 || stack == 0)
+    return false;
+  SuppressionType stype;
+  if (typ == ReportTypeRace)
+    stype = SuppressionRace;
+  else if (typ == ReportTypeThreadLeak)
+    stype = SuppressionThread;
+  else if (typ == ReportTypeMutexDestroyLocked)
+    stype = SuppressionMutex;
+  else if (typ == ReportTypeSignalUnsafe)
+    stype = SuppressionSignal;
+  else
+    return false;
+  for (const ReportStack *frame = stack; frame; frame = frame->next) {
+    if (frame->func == 0)
+      continue;
+    for (Suppression *supp = g_suppressions; supp; supp = supp->next) {
+      if (stype == supp->type && SuppressionMatch(supp->func, frame->func)) {
+        DPrintf("ThreadSanitizer: matched suppression '%s'\n", supp->func);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_suppressions.h b/lib/tsan/rtl/tsan_suppressions.h
new file mode 100644
index 0000000..06d7307
--- /dev/null
+++ b/lib/tsan/rtl/tsan_suppressions.h
@@ -0,0 +1,43 @@
+//===-- tsan_suppressions.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_SUPPRESSIONS_H
+#define TSAN_SUPPRESSIONS_H
+
+#include "tsan_report.h"
+
+namespace __tsan {
+
+void InitializeSuppressions();
+void FinalizeSuppressions();
+bool IsSuppressed(ReportType typ, const ReportStack *stack);
+
+// Exposed for testing.
+enum SuppressionType {
+  SuppressionRace,
+  SuppressionMutex,
+  SuppressionThread,
+  SuppressionSignal,
+};
+
+struct Suppression {
+  Suppression *next;
+  SuppressionType type;
+  char *func;
+};
+Suppression *SuppressionParse(const char* supp);
+bool SuppressionMatch(char *templ, const char *str);
+void SuppressionFree(Suppression *supp);
+
+}  // namespace __tsan
+
+#endif  // TSAN_SUPPRESSIONS_H
diff --git a/lib/tsan/rtl/tsan_symbolize.h b/lib/tsan/rtl/tsan_symbolize.h
new file mode 100644
index 0000000..4eaec4a
--- /dev/null
+++ b/lib/tsan/rtl/tsan_symbolize.h
@@ -0,0 +1,26 @@
+//===-- tsan_symbolize.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_SYMBOLIZE_H
+#define TSAN_SYMBOLIZE_H
+
+#include "tsan_defs.h"
+#include "tsan_report.h"
+
+namespace __tsan {
+
+ReportStack *SymbolizeCode(uptr addr);
+ReportStack *SymbolizeData(uptr addr);
+
+}  // namespace __tsan
+
+#endif  // TSAN_SYMBOLIZE_H
diff --git a/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc b/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc
new file mode 100644
index 0000000..55b7c5d
--- /dev/null
+++ b/lib/tsan/rtl/tsan_symbolize_addr2line_linux.cc
@@ -0,0 +1,178 @@
+//===-- tsan_symbolize_addr2line.cc -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_symbolize.h"
+#include "tsan_mman.h"
+#include "tsan_rtl.h"
+
+#include <unistd.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <link.h>
+#include <linux/limits.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+namespace __tsan {
+
+static bool GetSymbolizerFd(int *infdp, int *outfdp) {
+  static int outfd[2];
+  static int infd[2];
+  static int pid = -1;
+  static int inited = 0;
+  if (inited == 0) {
+    inited = -1;
+    if (pipe(outfd)) {
+      Printf("ThreadSanitizer: pipe() failed (%d)\n", errno);
+      Die();
+    }
+    if (pipe(infd)) {
+      Printf("ThreadSanitizer: pipe() failed (%d)\n", errno);
+      Die();
+    }
+    pid = fork();
+    if (pid == 0) {
+      close(STDOUT_FILENO);
+      close(STDIN_FILENO);
+      dup2(outfd[0], STDIN_FILENO);
+      dup2(infd[1], STDOUT_FILENO);
+      close(outfd[0]);
+      close(outfd[1]);
+      close(infd[0]);
+      close(infd[1]);
+      InternalScopedBuf<char> exe(PATH_MAX);
+      ssize_t len = readlink("/proc/self/exe", exe, exe.Size() - 1);
+      exe.Ptr()[len] = 0;
+      execl("/usr/bin/addr2line", "/usr/bin/addr2line", "-Cfe", exe.Ptr(),
+          NULL);
+      _exit(0);
+    } else if (pid < 0) {
+      Printf("ThreadSanitizer: failed to fork symbolizer\n");
+      Die();
+    }
+    close(outfd[0]);
+    close(infd[1]);
+    inited = 1;
+  } else if (inited > 0) {
+    int status = 0;
+    if (pid == waitpid(pid, &status, WNOHANG)) {
+      Printf("ThreadSanitizer: symbolizer died with status %d\n",
+          WEXITSTATUS(status));
+      Die();
+    }
+  }
+  *infdp = infd[0];
+  *outfdp = outfd[1];
+  return inited > 0;
+}
+
+static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *ctx) {
+  *(uptr*)ctx = (uptr)info->dlpi_addr;
+  return 1;
+}
+
+static uptr GetImageBase() {
+  static uptr base = 0;
+  if (base == 0)
+    dl_iterate_phdr(dl_iterate_phdr_cb, &base);
+  return base;
+}
+
+ReportStack *SymbolizeCode(uptr addr) {
+  uptr base = GetImageBase();
+  uptr offset = addr - base;
+  int infd = -1;
+  int outfd = -1;
+  if (!GetSymbolizerFd(&infd, &outfd))
+    return 0;
+  char addrstr[32];
+  Snprintf(addrstr, sizeof(addrstr), "%p\n", (void*)offset);
+  if (0 >= write(outfd, addrstr, internal_strlen(addrstr))) {
+    Printf("ThreadSanitizer: can't write from symbolizer\n");
+    Die();
+  }
+  InternalScopedBuf<char> func(1024);
+  ssize_t len = read(infd, func, func.Size() - 1);
+  if (len <= 0) {
+    Printf("ThreadSanitizer: can't read from symbolizer\n");
+    Die();
+  }
+  func.Ptr()[len] = 0;
+  ReportStack *res = (ReportStack*)internal_alloc(MBlockReportStack,
+                                                  sizeof(ReportStack));
+  internal_memset(res, 0, sizeof(*res));
+  res->module = (char*)internal_alloc(MBlockReportStack, 4);
+  internal_memcpy(res->module, "exe", 4);
+  res->offset = offset;
+  res->pc = addr;
+
+  char *pos = strchr(func, '\n');
+  if (pos && func[0] != '?') {
+    res->func = (char*)internal_alloc(MBlockReportStack, pos - func + 1);
+    internal_memcpy(res->func, func, pos - func);
+    res->func[pos - func] = 0;
+    char *pos2 = strchr(pos, ':');
+    if (pos2) {
+      res->file = (char*)internal_alloc(MBlockReportStack, pos2 - pos - 1 + 1);
+      internal_memcpy(res->file, pos + 1, pos2 - pos - 1);
+      res->file[pos2 - pos - 1] = 0;
+      res->line = atoi(pos2 + 1);
+     }
+  }
+  return res;
+}
+
+ReportStack *SymbolizeData(uptr addr) {
+  return 0;
+  /*
+  if (base == 0)
+    base = GetImageBase();
+  int res = 0;
+  InternalScopedBuf<char> cmd(1024);
+  Snprintf(cmd, cmd.Size(),
+  "nm -alC %s|grep \"%lx\"|awk '{printf(\"%%s\\n%%s\", $3, $4)}' > tsan.tmp2",
+    exe, (addr - base));
+  if (system(cmd))
+    return 0;
+  FILE* f3 = fopen("tsan.tmp2", "rb");
+  if (f3) {
+    InternalScopedBuf<char> tmp(1024);
+    if (fread(tmp, 1, tmp.Size(), f3) <= 0)
+      return 0;
+    char *pos = strchr(tmp, '\n');
+    if (pos && tmp[0] != '?') {
+      res = 1;
+      symb[0].module = 0;
+      symb[0].offset = addr;
+      symb[0].name = alloc->Alloc<char>(pos - tmp + 1);
+      internal_memcpy(symb[0].name, tmp, pos - tmp);
+      symb[0].name[pos - tmp] = 0;
+      symb[0].file = 0;
+      symb[0].line = 0;
+      char *pos2 = strchr(pos, ':');
+      if (pos2) {
+        symb[0].file = alloc->Alloc<char>(pos2 - pos - 1 + 1);
+        internal_memcpy(symb[0].file, pos + 1, pos2 - pos - 1);
+        symb[0].file[pos2 - pos - 1] = 0;
+        symb[0].line = atoi(pos2 + 1);
+      }
+    }
+    fclose(f3);
+  }
+  return res;
+  */
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
new file mode 100644
index 0000000..0b31ab9
--- /dev/null
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -0,0 +1,177 @@
+//===-- tsan_sync.cc --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_sync.h"
+#include "tsan_placement_new.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+
+namespace __tsan {
+
+SyncVar::SyncVar(uptr addr)
+  : mtx(MutexTypeSyncVar, StatMtxSyncVar)
+  , addr(addr)
+  , owner_tid(kInvalidTid)
+  , recursion()
+  , is_rw()
+  , is_recursive()
+  , is_broken() {
+}
+
+SyncTab::Part::Part()
+  : mtx(MutexTypeSyncTab, StatMtxSyncTab)
+  , val() {
+}
+
+SyncTab::SyncTab() {
+}
+
+SyncTab::~SyncTab() {
+  for (int i = 0; i < kPartCount; i++) {
+    while (tab_[i].val) {
+      SyncVar *tmp = tab_[i].val;
+      tab_[i].val = tmp->next;
+      DestroyAndFree(tmp);
+    }
+  }
+}
+
+SyncVar* SyncTab::GetAndLock(ThreadState *thr, uptr pc,
+                             uptr addr, bool write_lock) {
+  Part *p = &tab_[PartIdx(addr)];
+  {
+    ReadLock l(&p->mtx);
+    for (SyncVar *res = p->val; res; res = res->next) {
+      if (res->addr == addr) {
+        if (write_lock)
+          res->mtx.Lock();
+        else
+          res->mtx.ReadLock();
+        return res;
+      }
+    }
+  }
+  {
+    Lock l(&p->mtx);
+    SyncVar *res = p->val;
+    for (; res; res = res->next) {
+      if (res->addr == addr)
+        break;
+    }
+    if (res == 0) {
+      StatInc(thr, StatSyncCreated);
+      void *mem = internal_alloc(MBlockSync, sizeof(SyncVar));
+      res = new(mem) SyncVar(addr);
+      res->creation_stack.ObtainCurrent(thr, pc);
+      res->next = p->val;
+      p->val = res;
+    }
+    if (write_lock)
+      res->mtx.Lock();
+    else
+      res->mtx.ReadLock();
+    return res;
+  }
+}
+
+SyncVar* SyncTab::GetAndRemove(ThreadState *thr, uptr pc, uptr addr) {
+  Part *p = &tab_[PartIdx(addr)];
+  SyncVar *res = 0;
+  {
+    Lock l(&p->mtx);
+    SyncVar **prev = &p->val;
+    res = *prev;
+    while (res) {
+      if (res->addr == addr) {
+        *prev = res->next;
+        break;
+      }
+      prev = &res->next;
+      res = *prev;
+    }
+  }
+  if (res) {
+    StatInc(thr, StatSyncDestroyed);
+    res->mtx.Lock();
+    res->mtx.Unlock();
+  }
+  return res;
+}
+
+int SyncTab::PartIdx(uptr addr) {
+  return (addr >> 3) % kPartCount;
+}
+
+StackTrace::StackTrace()
+    : n_()
+    , s_() {
+}
+
+StackTrace::~StackTrace() {
+  Reset();
+}
+
+void StackTrace::Reset() {
+  if (s_) {
+    CHECK_NE(n_, 0);
+    internal_free(s_);
+    s_ = 0;
+    n_ = 0;
+  }
+}
+
+void StackTrace::Init(const uptr *pcs, uptr cnt) {
+  Reset();
+  if (cnt == 0)
+    return;
+  n_ = cnt;
+  s_ = (uptr*)internal_alloc(MBlockStackTrace, cnt * sizeof(s_[0]));
+  internal_memcpy(s_, pcs, cnt * sizeof(s_[0]));
+}
+
+void StackTrace::ObtainCurrent(ThreadState *thr, uptr toppc) {
+  Reset();
+  n_ = thr->shadow_stack_pos - &thr->shadow_stack[0];
+  if (n_ + !!toppc == 0)
+    return;
+  s_ = (uptr*)internal_alloc(MBlockStackTrace, (n_ + !!toppc) * sizeof(s_[0]));
+  for (uptr i = 0; i < n_; i++)
+    s_[i] = thr->shadow_stack[i];
+  if (toppc) {
+    s_[n_] = toppc;
+    n_++;
+  }
+}
+
+void StackTrace::CopyFrom(const StackTrace& other) {
+  Reset();
+  Init(other.Begin(), other.Size());
+}
+
+bool StackTrace::IsEmpty() const {
+  return n_ == 0;
+}
+
+uptr StackTrace::Size() const {
+  return n_;
+}
+
+uptr StackTrace::Get(uptr i) const {
+  CHECK_LT(i, n_);
+  return s_[i];
+}
+
+const uptr *StackTrace::Begin() const {
+  return s_;
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_sync.h b/lib/tsan/rtl/tsan_sync.h
new file mode 100644
index 0000000..45251f0
--- /dev/null
+++ b/lib/tsan/rtl/tsan_sync.h
@@ -0,0 +1,97 @@
+//===-- tsan_sync.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_SYNC_H
+#define TSAN_SYNC_H
+
+#include "tsan_atomic.h"
+#include "tsan_clock.h"
+#include "tsan_defs.h"
+#include "tsan_mutex.h"
+
+namespace __tsan {
+
+class SlabCache;
+
+class StackTrace {
+ public:
+  StackTrace();
+  ~StackTrace();
+  void Reset();
+
+  void Init(const uptr *pcs, uptr cnt);
+  void ObtainCurrent(ThreadState *thr, uptr toppc);
+  bool IsEmpty() const;
+  uptr Size() const;
+  uptr Get(uptr i) const;
+  const uptr *Begin() const;
+  void CopyFrom(const StackTrace& other);
+
+ private:
+  uptr n_;
+  uptr *s_;
+
+  StackTrace(const StackTrace&);
+  void operator = (const StackTrace&);
+};
+
+struct SyncVar {
+  explicit SyncVar(uptr addr);
+
+  static const int kInvalidTid = -1;
+
+  Mutex mtx;
+  const uptr addr;
+  SyncClock clock;
+  StackTrace creation_stack;
+  SyncClock read_clock;  // Used for rw mutexes only.
+  int owner_tid;  // Set only by exclusive owners.
+  int recursion;
+  bool is_rw;
+  bool is_recursive;
+  bool is_broken;
+  SyncVar *next;  // In SyncTab hashtable.
+};
+
+class SyncTab {
+ public:
+  SyncTab();
+  ~SyncTab();
+
+  // If the SyncVar does not exist yet, it is created.
+  SyncVar* GetAndLock(ThreadState *thr, uptr pc,
+                      uptr addr, bool write_lock);
+
+  // If the SyncVar does not exist, returns 0.
+  SyncVar* GetAndRemove(ThreadState *thr, uptr pc, uptr addr);
+
+ private:
+  struct Part {
+    Mutex mtx;
+    SyncVar *val;
+    char pad[kCacheLineSize - sizeof(Mutex) - sizeof(SyncVar*)];  // NOLINT
+    Part();
+  };
+
+  // FIXME: Implement something more sane.
+  static const int kPartCount = 1009;
+  Part tab_[kPartCount];
+
+  int PartIdx(uptr addr);
+
+  SyncTab(const SyncTab&);  // Not implemented.
+  void operator = (const SyncTab&);  // Not implemented.
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_SYNC_H
diff --git a/lib/tsan/rtl/tsan_trace.h b/lib/tsan/rtl/tsan_trace.h
new file mode 100644
index 0000000..4a1930d
--- /dev/null
+++ b/lib/tsan/rtl/tsan_trace.h
@@ -0,0 +1,59 @@
+//===-- tsan_trace.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_TRACE_H
+#define TSAN_TRACE_H
+
+#include "tsan_defs.h"
+#include "tsan_mutex.h"
+#include "tsan_sync.h"
+
+namespace __tsan {
+
+const int kTraceParts = 8;
+const int kTraceSize = 1024*1024;
+const int kTracePartSize = kTraceSize / kTraceParts;
+
+// Must fit into 3 bits.
+enum EventType {
+  EventTypeMop,
+  EventTypeFuncEnter,
+  EventTypeFuncExit,
+  EventTypeLock,
+  EventTypeUnlock,
+  EventTypeRLock,
+  EventTypeRUnlock,
+};
+
+// Represents a thread event (from most significant bit):
+// u64 typ  : 3;   // EventType.
+// u64 addr : 61;  // Associated pc.
+typedef u64 Event;
+
+struct TraceHeader {
+  StackTrace stack0;  // Start stack for the trace.
+  u64   epoch0;       // Start epoch for the trace.
+};
+
+struct Trace {
+  Event events[kTraceSize];
+  TraceHeader headers[kTraceParts];
+  Mutex mtx;
+
+  Trace()
+    : mtx(MutexTypeTrace, StatMtxTrace) {
+  }
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_TRACE_H
diff --git a/lib/tsan/rtl/tsan_update_shadow_word_inl.h b/lib/tsan/rtl/tsan_update_shadow_word_inl.h
new file mode 100644
index 0000000..c7864ce
--- /dev/null
+++ b/lib/tsan/rtl/tsan_update_shadow_word_inl.h
@@ -0,0 +1,79 @@
+//===-- tsan_update_shadow_word_inl.h ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Body of the hottest inner loop.
+// If we wrap this body into a function, compilers (both gcc and clang)
+// produce sligtly less efficient code.
+//===----------------------------------------------------------------------===//
+do {
+  StatInc(thr, StatShadowProcessed);
+  const unsigned kAccessSize = 1 << kAccessSizeLog;
+  unsigned off = cur.ComputeSearchOffset();
+  u64 *sp = &shadow_mem[(idx + off) % kShadowCnt];
+  old = LoadShadow(sp);
+  if (old.IsZero()) {
+    StatInc(thr, StatShadowZero);
+    if (store_word)
+      StoreIfNotYetStored(sp, &store_word);
+    // The above StoreIfNotYetStored could be done unconditionally
+    // and it even shows 4% gain on synthetic benchmarks (r4307).
+    break;
+  }
+  // is the memory access equal to the previous?
+  if (Shadow::Addr0AndSizeAreEqual(cur, old)) {
+    StatInc(thr, StatShadowSameSize);
+    // same thread?
+    if (Shadow::TidsAreEqual(old, cur)) {
+      StatInc(thr, StatShadowSameThread);
+      if (OldIsInSameSynchEpoch(old, thr)) {
+        if (OldIsRWStronger(old, kAccessIsWrite)) {
+          // found a slot that holds effectively the same info
+          // (that is, same tid, same sync epoch and same size)
+          StatInc(thr, StatMopSame);
+          return;
+        }
+        StoreIfNotYetStored(sp, &store_word);
+        break;
+      }
+      if (OldIsRWWeaker(old, kAccessIsWrite))
+        StoreIfNotYetStored(sp, &store_word);
+      break;
+    }
+    StatInc(thr, StatShadowAnotherThread);
+    if (HappensBefore(old, thr)) {
+      StoreIfNotYetStored(sp, &store_word);
+      break;
+    }
+    if (BothReads(old, kAccessIsWrite))
+      break;
+    goto RACE;
+  }
+
+  // Do the memory access intersect?
+  if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) {
+    StatInc(thr, StatShadowIntersect);
+    if (Shadow::TidsAreEqual(old, cur)) {
+      StatInc(thr, StatShadowSameThread);
+      break;
+    }
+    StatInc(thr, StatShadowAnotherThread);
+    if (HappensBefore(old, thr))
+      break;
+
+    if (BothReads(old, kAccessIsWrite))
+      break;
+
+    goto RACE;
+  }
+  // The accesses do not intersect.
+  StatInc(thr, StatShadowNotIntersect);
+  break;
+} while (0);
diff --git a/lib/tsan/rtl/tsan_vector.h b/lib/tsan/rtl/tsan_vector.h
new file mode 100644
index 0000000..d41063d
--- /dev/null
+++ b/lib/tsan/rtl/tsan_vector.h
@@ -0,0 +1,110 @@
+//===-- tsan_vector.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+// Low-fat STL-like vector container.
+
+#ifndef TSAN_VECTOR_H
+#define TSAN_VECTOR_H
+
+#include "tsan_defs.h"
+#include "tsan_mman.h"
+
+namespace __tsan {
+
+template<typename T>
+class Vector {
+ public:
+  explicit Vector(MBlockType typ)
+      : typ_(typ)
+      , begin_()
+      , end_()
+      , last_() {
+  }
+
+  ~Vector() {
+    if (begin_)
+      internal_free(begin_);
+  }
+
+  void Reset() {
+    if (begin_)
+      internal_free(begin_);
+    begin_ = 0;
+    end_ = 0;
+    last_ = 0;
+  }
+
+  uptr Size() const {
+    return end_ - begin_;
+  }
+
+  T &operator[](uptr i) {
+    DCHECK_LT(i, end_ - begin_);
+    return begin_[i];
+  }
+
+  const T &operator[](uptr i) const {
+    DCHECK_LT(i, end_ - begin_);
+    return begin_[i];
+  }
+
+  T *PushBack(T v = T()) {
+    EnsureSize(Size() + 1);
+    end_[-1] = v;
+    return &end_[-1];
+  }
+
+  void Resize(uptr size) {
+    uptr old_size = Size();
+    EnsureSize(size);
+    if (old_size < size) {
+      for (uptr i = old_size; i < size; i++)
+        begin_[i] = T();
+    }
+  }
+
+ private:
+  const MBlockType typ_;
+  T *begin_;
+  T *end_;
+  T *last_;
+
+  void EnsureSize(uptr size) {
+    if (size <= Size())
+      return;
+    if (size <= (uptr)(last_ - begin_)) {
+      end_ = begin_ + size;
+      return;
+    }
+    uptr cap0 = last_ - begin_;
+    uptr cap = 2 * cap0;
+    if (cap == 0)
+      cap = 16;
+    if (cap < size)
+      cap = size;
+    T *p = (T*)internal_alloc(typ_, cap * sizeof(T));
+    if (cap0) {
+      internal_memcpy(p, begin_, cap0 * sizeof(T));
+      internal_free(begin_);
+    }
+    begin_ = p;
+    end_ = begin_ + size;
+    last_ = begin_ + cap;
+  }
+
+  Vector(const Vector&);
+  void operator=(const Vector&);
+};
+}
+
+#endif  // #ifndef TSAN_VECTOR_H
diff --git a/lib/tsan/rtl_tests/tsan_bench.cc b/lib/tsan/rtl_tests/tsan_bench.cc
new file mode 100644
index 0000000..9bdbe61
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_bench.cc
@@ -0,0 +1,105 @@
+//===-- tsan_bench.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_test_util.h"
+#include "tsan_interface.h"
+#include "tsan_defs.h"
+#include "gtest/gtest.h"
+#include <stdint.h>
+
+const int kSize = 128;
+const int kRepeat = 2*1024*1024;
+
+void noinstr(void *p) {}
+
+template<typename T, void(*__tsan_mop)(void *p)>
+static void Benchmark() {
+  volatile T data[kSize];
+  for (int i = 0; i < kRepeat; i++) {
+    for (int j = 0; j < kSize; j++) {
+      __tsan_mop((void*)&data[j]);
+      data[j]++;
+    }
+  }
+}
+
+TEST(DISABLED_BENCH, Mop1) {
+  Benchmark<uint8_t, noinstr>();
+}
+
+TEST(DISABLED_BENCH, Mop1Read) {
+  Benchmark<uint8_t, __tsan_read1>();
+}
+
+TEST(DISABLED_BENCH, Mop1Write) {
+  Benchmark<uint8_t, __tsan_write1>();
+}
+
+TEST(DISABLED_BENCH, Mop2) {
+  Benchmark<uint16_t, noinstr>();
+}
+
+TEST(DISABLED_BENCH, Mop2Read) {
+  Benchmark<uint16_t, __tsan_read2>();
+}
+
+TEST(DISABLED_BENCH, Mop2Write) {
+  Benchmark<uint16_t, __tsan_write2>();
+}
+
+TEST(DISABLED_BENCH, Mop4) {
+  Benchmark<uint32_t, noinstr>();
+}
+
+TEST(DISABLED_BENCH, Mop4Read) {
+  Benchmark<uint32_t, __tsan_read4>();
+}
+
+TEST(DISABLED_BENCH, Mop4Write) {
+  Benchmark<uint32_t, __tsan_write4>();
+}
+
+TEST(DISABLED_BENCH, Mop8) {
+  Benchmark<uint8_t, noinstr>();
+}
+
+TEST(DISABLED_BENCH, Mop8Read) {
+  Benchmark<uint64_t, __tsan_read8>();
+}
+
+TEST(DISABLED_BENCH, Mop8Write) {
+  Benchmark<uint64_t, __tsan_write8>();
+}
+
+TEST(DISABLED_BENCH, FuncCall) {
+  for (int i = 0; i < kRepeat; i++) {
+    for (int j = 0; j < kSize; j++)
+      __tsan_func_entry((void*)(uintptr_t)j);
+    for (int j = 0; j < kSize; j++)
+      __tsan_func_exit();
+  }
+}
+
+TEST(DISABLED_BENCH, MutexLocal) {
+  Mutex m;
+  ScopedThread().Create(m);
+  for (int i = 0; i < 50; i++) {
+    ScopedThread t;
+    t.Lock(m);
+    t.Unlock(m);
+  }
+  for (int i = 0; i < 16*1024*1024; i++) {
+    m.Lock();
+    m.Unlock();
+  }
+  ScopedThread().Destroy(m);
+}
diff --git a/lib/tsan/rtl_tests/tsan_mop.cc b/lib/tsan/rtl_tests/tsan_mop.cc
new file mode 100644
index 0000000..79bfc28
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_mop.cc
@@ -0,0 +1,231 @@
+//===-- tsan_mop.cc ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_interface.h"
+#include "tsan_test_util.h"
+#include "gtest/gtest.h"
+#include <stddef.h>
+#include <stdint.h>
+
+TEST(ThreadSanitizer, SimpleWrite) {
+  ScopedThread t;
+  MemLoc l;
+  t.Write1(l);
+}
+
+TEST(ThreadSanitizer, SimpleWriteWrite) {
+  ScopedThread t1, t2;
+  MemLoc l1, l2;
+  t1.Write1(l1);
+  t2.Write1(l2);
+}
+
+TEST(ThreadSanitizer, WriteWriteRace) {
+  ScopedThread t1, t2;
+  MemLoc l;
+  t1.Write1(l);
+  t2.Write1(l, true);
+}
+
+TEST(ThreadSanitizer, ReadWriteRace) {
+  ScopedThread t1, t2;
+  MemLoc l;
+  t1.Read1(l);
+  t2.Write1(l, true);
+}
+
+TEST(ThreadSanitizer, WriteReadRace) {
+  ScopedThread t1, t2;
+  MemLoc l;
+  t1.Write1(l);
+  t2.Read1(l, true);
+}
+
+TEST(ThreadSanitizer, ReadReadNoRace) {
+  ScopedThread t1, t2;
+  MemLoc l;
+  t1.Read1(l);
+  t2.Read1(l);
+}
+
+TEST(ThreadSanitizer, WriteThenRead) {
+  MemLoc l;
+  ScopedThread t1, t2;
+  t1.Write1(l);
+  t1.Read1(l);
+  t2.Read1(l, true);
+}
+
+TEST(ThreadSanitizer, WriteThenLockedRead) {
+  Mutex m(Mutex::RW);
+  MainThread t0;
+  t0.Create(m);
+  MemLoc l;
+  {
+    ScopedThread t1, t2;
+
+    t1.Write8(l);
+
+    t1.Lock(m);
+    t1.Read8(l);
+    t1.Unlock(m);
+
+    t2.Read8(l, true);
+  }
+  t0.Destroy(m);
+}
+
+TEST(ThreadSanitizer, LockedWriteThenRead) {
+  Mutex m(Mutex::RW);
+  MainThread t0;
+  t0.Create(m);
+  MemLoc l;
+  {
+    ScopedThread t1, t2;
+
+    t1.Lock(m);
+    t1.Write8(l);
+    t1.Unlock(m);
+
+    t1.Read8(l);
+
+    t2.Read8(l, true);
+  }
+  t0.Destroy(m);
+}
+
+
+TEST(ThreadSanitizer, RaceWithOffset) {
+  ScopedThread t1, t2;
+  {
+    MemLoc l;
+    t1.Access(l.loc(), true, 8, false);
+    t2.Access((char*)l.loc() + 4, true, 4, true);
+  }
+  {
+    MemLoc l;
+    t1.Access(l.loc(), true, 8, false);
+    t2.Access((char*)l.loc() + 7, true, 1, true);
+  }
+  {
+    MemLoc l;
+    t1.Access((char*)l.loc() + 4, true, 4, false);
+    t2.Access((char*)l.loc() + 4, true, 2, true);
+  }
+  {
+    MemLoc l;
+    t1.Access((char*)l.loc() + 4, true, 4, false);
+    t2.Access((char*)l.loc() + 6, true, 2, true);
+  }
+  {
+    MemLoc l;
+    t1.Access((char*)l.loc() + 3, true, 2, false);
+    t2.Access((char*)l.loc() + 4, true, 1, true);
+  }
+  {
+    MemLoc l;
+    t1.Access((char*)l.loc() + 1, true, 8, false);
+    t2.Access((char*)l.loc() + 3, true, 1, true);
+  }
+}
+
+TEST(ThreadSanitizer, RaceWithOffset2) {
+  ScopedThread t1, t2;
+  {
+    MemLoc l;
+    t1.Access((char*)l.loc(), true, 4, false);
+    t2.Access((char*)l.loc() + 2, true, 1, true);
+  }
+  {
+    MemLoc l;
+    t1.Access((char*)l.loc() + 2, true, 1, false);
+    t2.Access((char*)l.loc(), true, 4, true);
+  }
+}
+
+TEST(ThreadSanitizer, NoRaceWithOffset) {
+  ScopedThread t1, t2;
+  {
+    MemLoc l;
+    t1.Access(l.loc(), true, 4, false);
+    t2.Access((char*)l.loc() + 4, true, 4, false);
+  }
+  {
+    MemLoc l;
+    t1.Access((char*)l.loc() + 3, true, 2, false);
+    t2.Access((char*)l.loc() + 1, true, 2, false);
+    t2.Access((char*)l.loc() + 5, true, 2, false);
+  }
+}
+
+TEST(ThreadSanitizer, RaceWithDeadThread) {
+  MemLoc l;
+  ScopedThread t;
+  ScopedThread().Write1(l);
+  t.Write1(l, true);
+}
+
+TEST(ThreadSanitizer, BenignRaceOnVptr) {
+  void *vptr_storage;
+  MemLoc vptr(&vptr_storage), val;
+  vptr_storage = val.loc();
+  ScopedThread t1, t2;
+  t1.VptrUpdate(vptr, val);
+  t2.Read8(vptr);
+}
+
+TEST(ThreadSanitizer, HarmfulRaceOnVptr) {
+  void *vptr_storage;
+  MemLoc vptr(&vptr_storage), val1, val2;
+  vptr_storage = val1.loc();
+  ScopedThread t1, t2;
+  t1.VptrUpdate(vptr, val2);
+  t2.Read8(vptr, true);
+}
+
+static void foo() {
+  volatile int x = 42;
+  (void)x;
+}
+
+static void bar() {
+  volatile int x = 43;
+  (void)x;
+}
+
+TEST(ThreadSanitizer, ReportDeadThread) {
+  MemLoc l;
+  ScopedThread t1;
+  {
+    ScopedThread t2;
+    t2.Call(&foo);
+    t2.Call(&bar);
+    t2.Write1(l);
+  }
+  t1.Write1(l, true);
+}
+
+struct ClassWithStatic {
+  static int Data[4];
+};
+
+int ClassWithStatic::Data[4];
+
+static void foobarbaz() {}
+
+TEST(ThreadSanitizer, ReportRace) {
+  ScopedThread t1;
+  MainThread().Access(&ClassWithStatic::Data, true, 4, false);
+  t1.Call(&foobarbaz);
+  t1.Access(&ClassWithStatic::Data, true, 2, true);
+  t1.Return();
+}
diff --git a/lib/tsan/rtl_tests/tsan_mutex.cc b/lib/tsan/rtl_tests/tsan_mutex.cc
new file mode 100644
index 0000000..47495f4
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_mutex.cc
@@ -0,0 +1,221 @@
+//===-- tsan_mutex.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_atomic.h"
+#include "tsan_interface.h"
+#include "tsan_interface_ann.h"
+#include "tsan_test_util.h"
+#include "gtest/gtest.h"
+#include <stdint.h>
+
+namespace __tsan {
+
+TEST(ThreadSanitizer, BasicMutex) {
+  ScopedThread t;
+  Mutex m;
+  t.Create(m);
+
+  t.Lock(m);
+  t.Unlock(m);
+
+  CHECK(t.TryLock(m));
+  t.Unlock(m);
+
+  t.Lock(m);
+  CHECK(!t.TryLock(m));
+  t.Unlock(m);
+
+  t.Destroy(m);
+}
+
+TEST(ThreadSanitizer, BasicSpinMutex) {
+  ScopedThread t;
+  Mutex m(Mutex::Spin);
+  t.Create(m);
+
+  t.Lock(m);
+  t.Unlock(m);
+
+  CHECK(t.TryLock(m));
+  t.Unlock(m);
+
+  t.Lock(m);
+  CHECK(!t.TryLock(m));
+  t.Unlock(m);
+
+  t.Destroy(m);
+}
+
+TEST(ThreadSanitizer, BasicRwMutex) {
+  ScopedThread t;
+  Mutex m(Mutex::RW);
+  t.Create(m);
+
+  t.Lock(m);
+  t.Unlock(m);
+
+  CHECK(t.TryLock(m));
+  t.Unlock(m);
+
+  t.Lock(m);
+  CHECK(!t.TryLock(m));
+  t.Unlock(m);
+
+  t.ReadLock(m);
+  t.ReadUnlock(m);
+
+  CHECK(t.TryReadLock(m));
+  t.ReadUnlock(m);
+
+  t.Lock(m);
+  CHECK(!t.TryReadLock(m));
+  t.Unlock(m);
+
+  t.ReadLock(m);
+  CHECK(!t.TryLock(m));
+  t.ReadUnlock(m);
+
+  t.ReadLock(m);
+  CHECK(t.TryReadLock(m));
+  t.ReadUnlock(m);
+  t.ReadUnlock(m);
+
+  t.Destroy(m);
+}
+
+TEST(ThreadSanitizer, Mutex) {
+  Mutex m;
+  MainThread t0;
+  t0.Create(m);
+
+  ScopedThread t1, t2;
+  MemLoc l;
+  t1.Lock(m);
+  t1.Write1(l);
+  t1.Unlock(m);
+  t2.Lock(m);
+  t2.Write1(l);
+  t2.Unlock(m);
+  t2.Destroy(m);
+}
+
+TEST(ThreadSanitizer, SpinMutex) {
+  Mutex m(Mutex::Spin);
+  MainThread t0;
+  t0.Create(m);
+
+  ScopedThread t1, t2;
+  MemLoc l;
+  t1.Lock(m);
+  t1.Write1(l);
+  t1.Unlock(m);
+  t2.Lock(m);
+  t2.Write1(l);
+  t2.Unlock(m);
+  t2.Destroy(m);
+}
+
+TEST(ThreadSanitizer, RwMutex) {
+  Mutex m(Mutex::RW);
+  MainThread t0;
+  t0.Create(m);
+
+  ScopedThread t1, t2, t3;
+  MemLoc l;
+  t1.Lock(m);
+  t1.Write1(l);
+  t1.Unlock(m);
+  t2.Lock(m);
+  t2.Write1(l);
+  t2.Unlock(m);
+  t1.ReadLock(m);
+  t3.ReadLock(m);
+  t1.Read1(l);
+  t3.Read1(l);
+  t1.ReadUnlock(m);
+  t3.ReadUnlock(m);
+  t2.Lock(m);
+  t2.Write1(l);
+  t2.Unlock(m);
+  t2.Destroy(m);
+}
+
+TEST(ThreadSanitizer, StaticMutex) {
+  // Emulates statically initialized mutex.
+  Mutex m;
+  m.StaticInit();
+  {
+    ScopedThread t1, t2;
+    t1.Lock(m);
+    t1.Unlock(m);
+    t2.Lock(m);
+    t2.Unlock(m);
+  }
+  MainThread().Destroy(m);
+}
+
+static void *singleton_thread(void *param) {
+  atomic_uintptr_t *singleton = (atomic_uintptr_t *)param;
+  for (int i = 0; i < 4*1024*1024; i++) {
+    int *val = (int *)atomic_load(singleton, memory_order_acquire);
+    __tsan_acquire(singleton);
+    __tsan_read4(val);
+    CHECK_EQ(*val, 42);
+  }
+  return 0;
+}
+
+TEST(DISABLED_BENCH_ThreadSanitizer, Singleton) {
+  const int kClockSize = 100;
+  const int kThreadCount = 8;
+
+  // Puff off thread's clock.
+  for (int i = 0; i < kClockSize; i++) {
+    ScopedThread t1;
+    (void)t1;
+  }
+  // Create the singleton.
+  int val = 42;
+  __tsan_write4(&val);
+  atomic_uintptr_t singleton;
+  __tsan_release(&singleton);
+  atomic_store(&singleton, (uintptr_t)&val, memory_order_release);
+  // Create reader threads.
+  pthread_t threads[kThreadCount];
+  for (int t = 0; t < kThreadCount; t++)
+    pthread_create(&threads[t], 0, singleton_thread, &singleton);
+  for (int t = 0; t < kThreadCount; t++)
+    pthread_join(threads[t], 0);
+}
+
+TEST(DISABLED_BENCH_ThreadSanitizer, StopFlag) {
+  const int kClockSize = 100;
+  const int kIters = 16*1024*1024;
+
+  // Puff off thread's clock.
+  for (int i = 0; i < kClockSize; i++) {
+    ScopedThread t1;
+    (void)t1;
+  }
+  // Create the stop flag.
+  atomic_uintptr_t flag;
+  __tsan_release(&flag);
+  atomic_store(&flag, 0, memory_order_release);
+  // Read it a lot.
+  for (int i = 0; i < kIters; i++) {
+    uptr v = atomic_load(&flag, memory_order_acquire);
+    __tsan_acquire(&flag);
+    CHECK_EQ(v, 0);
+  }
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl_tests/tsan_posix.cc b/lib/tsan/rtl_tests/tsan_posix.cc
new file mode 100644
index 0000000..4f98d50
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_posix.cc
@@ -0,0 +1,146 @@
+//===-- tsan_posix.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_interface.h"
+#include "tsan_test_util.h"
+#include "gtest/gtest.h"
+#include <pthread.h>
+
+struct thread_key {
+  pthread_key_t key;
+  pthread_mutex_t *mtx;
+  int val;
+  int *cnt;
+  thread_key(pthread_key_t key, pthread_mutex_t *mtx, int val, int *cnt)
+    : key(key)
+    , mtx(mtx)
+    , val(val)
+    , cnt(cnt) {
+  }
+};
+
+static void thread_secific_dtor(void *v) {
+  thread_key *k = (thread_key *)v;
+  EXPECT_EQ(pthread_mutex_lock(k->mtx), 0);
+  (*k->cnt)++;
+  __tsan_write4(&k->cnt);
+  EXPECT_EQ(pthread_mutex_unlock(k->mtx), 0);
+  if (k->val == 42) {
+    delete k;
+  } else if (k->val == 43 || k->val == 44) {
+    k->val--;
+    EXPECT_EQ(pthread_setspecific(k->key, k), 0);
+  } else {
+    ASSERT_TRUE(false);
+  }
+}
+
+static void *dtors_thread(void *p) {
+  thread_key *k = (thread_key *)p;
+  EXPECT_EQ(pthread_setspecific(k->key, k), 0);
+  return 0;
+}
+
+TEST(Posix, ThreadSpecificDtors) {
+  int cnt = 0;
+  pthread_key_t key;
+  EXPECT_EQ(pthread_key_create(&key, thread_secific_dtor), 0);
+  pthread_mutex_t mtx;
+  EXPECT_EQ(pthread_mutex_init(&mtx, 0), 0);
+  pthread_t th[3];
+  thread_key *k[3];
+  k[0] = new thread_key(key, &mtx, 42, &cnt);
+  k[1] = new thread_key(key, &mtx, 43, &cnt);
+  k[2] = new thread_key(key, &mtx, 44, &cnt);
+  EXPECT_EQ(pthread_create(&th[0], 0, dtors_thread, k[0]), 0);
+  EXPECT_EQ(pthread_create(&th[1], 0, dtors_thread, k[1]), 0);
+  EXPECT_EQ(pthread_join(th[0], 0), 0);
+  EXPECT_EQ(pthread_create(&th[2], 0, dtors_thread, k[2]), 0);
+  EXPECT_EQ(pthread_join(th[1], 0), 0);
+  EXPECT_EQ(pthread_join(th[2], 0), 0);
+  EXPECT_EQ(pthread_key_delete(key), 0);
+  EXPECT_EQ(6, cnt);
+}
+
+static __thread int local_var;
+
+static void *local_thread(void *p) {
+  __tsan_write1(&local_var);
+  __tsan_write1(&p);
+  if (p == 0)
+    return 0;
+  const int kThreads = 4;
+  pthread_t th[kThreads];
+  for (int i = 0; i < kThreads; i++)
+    EXPECT_EQ(pthread_create(&th[i], 0, local_thread,
+              (void*)((long)p - 1)), 0);  // NOLINT
+  for (int i = 0; i < kThreads; i++)
+    EXPECT_EQ(pthread_join(th[i], 0), 0);
+  return 0;
+}
+
+TEST(Posix, ThreadLocalAccesses) {
+  local_thread((void*)2);
+}
+
+struct CondContext {
+  pthread_mutex_t m;
+  pthread_cond_t c;
+  int data;
+};
+
+static void *cond_thread(void *p) {
+  CondContext &ctx = *static_cast<CondContext*>(p);
+
+  EXPECT_EQ(pthread_mutex_lock(&ctx.m), 0);
+  EXPECT_EQ(ctx.data, 0);
+  ctx.data = 1;
+  EXPECT_EQ(pthread_cond_signal(&ctx.c), 0);
+  EXPECT_EQ(pthread_mutex_unlock(&ctx.m), 0);
+
+  EXPECT_EQ(pthread_mutex_lock(&ctx.m), 0);
+  while (ctx.data != 2)
+    EXPECT_EQ(pthread_cond_wait(&ctx.c, &ctx.m), 0);
+  EXPECT_EQ(pthread_mutex_unlock(&ctx.m), 0);
+
+  EXPECT_EQ(pthread_mutex_lock(&ctx.m), 0);
+  ctx.data = 3;
+  EXPECT_EQ(pthread_cond_broadcast(&ctx.c), 0);
+  EXPECT_EQ(pthread_mutex_unlock(&ctx.m), 0);
+
+  return 0;
+}
+
+TEST(Posix, CondBasic) {
+  CondContext ctx;
+  EXPECT_EQ(pthread_mutex_init(&ctx.m, 0), 0);
+  EXPECT_EQ(pthread_cond_init(&ctx.c, 0), 0);
+  ctx.data = 0;
+  pthread_t th;
+  EXPECT_EQ(pthread_create(&th, 0, cond_thread, &ctx), 0);
+
+  EXPECT_EQ(pthread_mutex_lock(&ctx.m), 0);
+  while (ctx.data != 1)
+    EXPECT_EQ(pthread_cond_wait(&ctx.c, &ctx.m), 0);
+  ctx.data = 2;
+  EXPECT_EQ(pthread_mutex_unlock(&ctx.m), 0);
+  EXPECT_EQ(pthread_cond_broadcast(&ctx.c), 0);
+
+  EXPECT_EQ(pthread_mutex_lock(&ctx.m), 0);
+  while (ctx.data != 3)
+    EXPECT_EQ(pthread_cond_wait(&ctx.c, &ctx.m), 0);
+  EXPECT_EQ(pthread_mutex_unlock(&ctx.m), 0);
+
+  EXPECT_EQ(pthread_join(th, 0), 0);
+  EXPECT_EQ(pthread_cond_destroy(&ctx.c), 0);
+  EXPECT_EQ(pthread_mutex_destroy(&ctx.m), 0);
+}
diff --git a/lib/tsan/rtl_tests/tsan_string.cc b/lib/tsan/rtl_tests/tsan_string.cc
new file mode 100644
index 0000000..13b0553
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_string.cc
@@ -0,0 +1,82 @@
+//===-- tsan_string.cc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_test_util.h"
+#include "gtest/gtest.h"
+#include <string.h>
+
+namespace __tsan {
+
+TEST(ThreadSanitizer, Memcpy) {
+  char data0[7] = {1, 2, 3, 4, 5, 6, 7};
+  char data[7] = {42, 42, 42, 42, 42, 42, 42};
+  MainThread().Memcpy(data+1, data0+1, 5);
+  EXPECT_EQ(data[0], 42);
+  EXPECT_EQ(data[1], 2);
+  EXPECT_EQ(data[2], 3);
+  EXPECT_EQ(data[3], 4);
+  EXPECT_EQ(data[4], 5);
+  EXPECT_EQ(data[5], 6);
+  EXPECT_EQ(data[6], 42);
+  MainThread().Memset(data+1, 13, 5);
+  EXPECT_EQ(data[0], 42);
+  EXPECT_EQ(data[1], 13);
+  EXPECT_EQ(data[2], 13);
+  EXPECT_EQ(data[3], 13);
+  EXPECT_EQ(data[4], 13);
+  EXPECT_EQ(data[5], 13);
+  EXPECT_EQ(data[6], 42);
+}
+
+TEST(ThreadSanitizer, MemcpyRace1) {
+  char *data = new char[10];
+  char *data1 = new char[10];
+  char *data2 = new char[10];
+  ScopedThread t1, t2;
+  t1.Memcpy(data, data1, 10);
+  t2.Memcpy(data, data2, 10, true);
+}
+
+TEST(ThreadSanitizer, MemcpyRace2) {
+  char *data = new char[10];
+  char *data1 = new char[10];
+  char *data2 = new char[10];
+  ScopedThread t1, t2;
+  t1.Memcpy(data+5, data1, 1);
+  t2.Memcpy(data+3, data2, 4, true);
+}
+
+TEST(ThreadSanitizer, MemcpyRace3) {
+  char *data = new char[10];
+  char *data1 = new char[10];
+  char *data2 = new char[10];
+  ScopedThread t1, t2;
+  t1.Memcpy(data, data1, 10);
+  t2.Memcpy(data1, data2, 10, true);
+}
+
+TEST(ThreadSanitizer, MemcpyStack) {
+  char *data = new char[10];
+  char *data1 = new char[10];
+  ScopedThread t1, t2;
+  t1.Memcpy(data, data1, 10);
+  t2.Memcpy(data, data1, 10, true);
+}
+
+TEST(ThreadSanitizer, MemsetRace1) {
+  char *data = new char[10];
+  ScopedThread t1, t2;
+  t1.Memset(data, 1, 10);
+  t2.Memset(data, 2, 10, true);
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl_tests/tsan_test.cc b/lib/tsan/rtl_tests/tsan_test.cc
new file mode 100644
index 0000000..839f7da
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_test.cc
@@ -0,0 +1,43 @@
+//===-- tsan_test.cc --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_interface.h"
+#include "tsan_test_util.h"
+#include "gtest/gtest.h"
+
+static void foo() {}
+static void bar() {}
+
+TEST(ThreadSanitizer, FuncCall) {
+  ScopedThread t1, t2;
+  MemLoc l;
+  t1.Write1(l);
+  t2.Call(foo);
+  t2.Call(bar);
+  t2.Write1(l, true);
+  t2.Return();
+  t2.Return();
+}
+
+int main(int argc, char **argv) {
+  TestMutexBeforeInit();  // Mutexes must be usable before __tsan_init();
+  __tsan_init();
+  __tsan_func_entry(__builtin_return_address(0));
+  __tsan_func_entry((char*)&main + 1);
+
+  testing::InitGoogleTest(&argc, argv);
+  int res = RUN_ALL_TESTS();
+
+  __tsan_func_exit();
+  __tsan_func_exit();
+  return res;
+}
diff --git a/lib/tsan/rtl_tests/tsan_test_util.h b/lib/tsan/rtl_tests/tsan_test_util.h
new file mode 100644
index 0000000..483a564
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_test_util.h
@@ -0,0 +1,122 @@
+//===-- tsan_test_util.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Test utils.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_TEST_UTIL_H
+#define TSAN_TEST_UTIL_H
+
+void TestMutexBeforeInit();
+
+// A location of memory on which a race may be detected.
+class MemLoc {
+ public:
+  explicit MemLoc(int offset_from_aligned = 0);
+  explicit MemLoc(void *const real_addr) : loc_(real_addr) { }
+  ~MemLoc();
+  void *loc() const { return loc_; }
+ private:
+  void *const loc_;
+  MemLoc(const MemLoc&);
+  void operator = (const MemLoc&);
+};
+
+class Mutex {
+ public:
+  enum Type { Normal, Spin, RW };
+
+  explicit Mutex(Type type = Normal);
+  ~Mutex();
+
+  void Init();
+  void StaticInit();  // Emulates static initalization (tsan invisible).
+  void Destroy();
+  void Lock();
+  bool TryLock();
+  void Unlock();
+  void ReadLock();
+  bool TryReadLock();
+  void ReadUnlock();
+
+ private:
+  // Placeholder for pthread_mutex_t, CRITICAL_SECTION or whatever.
+  void *mtx_[128];
+  bool alive_;
+  const Type type_;
+
+  Mutex(const Mutex&);
+  void operator = (const Mutex&);
+};
+
+// A thread is started in CTOR and joined in DTOR.
+class ScopedThread {
+ public:
+  explicit ScopedThread(bool detached = false, bool main = false);
+  ~ScopedThread();
+  void Detach();
+
+  void Access(void *addr, bool is_write, int size, bool expect_race);
+  void Read(const MemLoc &ml, int size, bool expect_race = false) {
+    Access(ml.loc(), false, size, expect_race);
+  }
+  void Write(const MemLoc &ml, int size, bool expect_race = false) {
+    Access(ml.loc(), true, size, expect_race);
+  }
+  void Read1(const MemLoc &ml, bool expect_race = false) {
+    Read(ml, 1, expect_race); }
+  void Read2(const MemLoc &ml, bool expect_race = false) {
+    Read(ml, 2, expect_race); }
+  void Read4(const MemLoc &ml, bool expect_race = false) {
+    Read(ml, 4, expect_race); }
+  void Read8(const MemLoc &ml, bool expect_race = false) {
+    Read(ml, 8, expect_race); }
+  void Write1(const MemLoc &ml, bool expect_race = false) {
+    Write(ml, 1, expect_race); }
+  void Write2(const MemLoc &ml, bool expect_race = false) {
+    Write(ml, 2, expect_race); }
+  void Write4(const MemLoc &ml, bool expect_race = false) {
+    Write(ml, 4, expect_race); }
+  void Write8(const MemLoc &ml, bool expect_race = false) {
+    Write(ml, 8, expect_race); }
+
+  void VptrUpdate(const MemLoc &vptr, const MemLoc &new_val,
+                  bool expect_race = false);
+
+  void Call(void(*pc)());
+  void Return();
+
+  void Create(const Mutex &m);
+  void Destroy(const Mutex &m);
+  void Lock(const Mutex &m);
+  bool TryLock(const Mutex &m);
+  void Unlock(const Mutex &m);
+  void ReadLock(const Mutex &m);
+  bool TryReadLock(const Mutex &m);
+  void ReadUnlock(const Mutex &m);
+
+  void Memcpy(void *dst, const void *src, int size, bool expect_race = false);
+  void Memset(void *dst, int val, int size, bool expect_race = false);
+
+ private:
+  struct Impl;
+  Impl *impl_;
+  ScopedThread(const ScopedThread&);  // Not implemented.
+  void operator = (const ScopedThread&);  // Not implemented.
+};
+
+class MainThread : public ScopedThread {
+ public:
+  MainThread()
+    : ScopedThread(false, true) {
+  }
+};
+
+#endif  // #ifndef TSAN_TEST_UTIL_H
diff --git a/lib/tsan/rtl_tests/tsan_test_util_linux.cc b/lib/tsan/rtl_tests/tsan_test_util_linux.cc
new file mode 100644
index 0000000..10367ac
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_test_util_linux.cc
@@ -0,0 +1,460 @@
+//===-- tsan_test_util_linux.cc ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Test utils, linux implementation.
+//===----------------------------------------------------------------------===//
+
+#include "tsan_interface.h"
+#include "tsan_test_util.h"
+#include "tsan_atomic.h"
+#include "tsan_report.h"
+
+#include "gtest/gtest.h"
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+using namespace __tsan;  // NOLINT
+
+static __thread bool expect_report;
+static __thread bool expect_report_reported;
+static __thread ReportType expect_report_type;
+
+static void *BeforeInitThread(void *param) {
+  (void)param;
+  return 0;
+}
+
+static void AtExit() {
+}
+
+void TestMutexBeforeInit() {
+  // Mutexes must be usable before __tsan_init();
+  pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;
+  pthread_mutex_lock(&mtx);
+  pthread_mutex_unlock(&mtx);
+  pthread_mutex_destroy(&mtx);
+  pthread_t thr;
+  pthread_create(&thr, 0, BeforeInitThread, 0);
+  pthread_join(thr, 0);
+  atexit(AtExit);
+}
+
+namespace __tsan {
+bool OnReport(const ReportDesc *rep, bool suppressed) {
+  if (expect_report) {
+    if (rep->typ != expect_report_type) {
+      printf("Expected report of type %d, got type %d\n",
+             (int)expect_report_type, (int)rep->typ);
+      EXPECT_FALSE("Wrong report type");
+      return false;
+    }
+  } else {
+    EXPECT_FALSE("Unexpected report");
+    return false;
+  }
+  expect_report_reported = true;
+  return true;
+}
+}
+
+static void* allocate_addr(int size, int offset_from_aligned = 0) {
+  static uintptr_t foo;
+  static atomic_uintptr_t uniq = {(uintptr_t)&foo};  // Some real address.
+  const int kAlign = 16;
+  CHECK(offset_from_aligned < kAlign);
+  size = (size + 2 * kAlign) & ~(kAlign - 1);
+  uintptr_t addr = atomic_fetch_add(&uniq, size, memory_order_relaxed);
+  return (void*)(addr + offset_from_aligned);
+}
+
+MemLoc::MemLoc(int offset_from_aligned)
+  : loc_(allocate_addr(16, offset_from_aligned)) {
+}
+
+MemLoc::~MemLoc() {
+}
+
+Mutex::Mutex(Type type)
+  : alive_()
+  , type_(type) {
+}
+
+Mutex::~Mutex() {
+  CHECK(!alive_);
+}
+
+void Mutex::Init() {
+  CHECK(!alive_);
+  alive_ = true;
+  if (type_ == Normal)
+    CHECK_EQ(pthread_mutex_init((pthread_mutex_t*)mtx_, 0), 0);
+  else if (type_ == Spin)
+    CHECK_EQ(pthread_spin_init((pthread_spinlock_t*)mtx_, 0), 0);
+  else if (type_ == RW)
+    CHECK_EQ(pthread_rwlock_init((pthread_rwlock_t*)mtx_, 0), 0);
+  else
+    CHECK(0);
+}
+
+void Mutex::StaticInit() {
+  CHECK(!alive_);
+  CHECK(type_ == Normal);
+  alive_ = true;
+  pthread_mutex_t tmp = PTHREAD_MUTEX_INITIALIZER;
+  memcpy(mtx_, &tmp, sizeof(tmp));
+}
+
+void Mutex::Destroy() {
+  CHECK(alive_);
+  alive_ = false;
+  if (type_ == Normal)
+    CHECK_EQ(pthread_mutex_destroy((pthread_mutex_t*)mtx_), 0);
+  else if (type_ == Spin)
+    CHECK_EQ(pthread_spin_destroy((pthread_spinlock_t*)mtx_), 0);
+  else if (type_ == RW)
+    CHECK_EQ(pthread_rwlock_destroy((pthread_rwlock_t*)mtx_), 0);
+}
+
+void Mutex::Lock() {
+  CHECK(alive_);
+  if (type_ == Normal)
+    CHECK_EQ(pthread_mutex_lock((pthread_mutex_t*)mtx_), 0);
+  else if (type_ == Spin)
+    CHECK_EQ(pthread_spin_lock((pthread_spinlock_t*)mtx_), 0);
+  else if (type_ == RW)
+    CHECK_EQ(pthread_rwlock_wrlock((pthread_rwlock_t*)mtx_), 0);
+}
+
+bool Mutex::TryLock() {
+  CHECK(alive_);
+  if (type_ == Normal)
+    return pthread_mutex_trylock((pthread_mutex_t*)mtx_) == 0;
+  else if (type_ == Spin)
+    return pthread_spin_trylock((pthread_spinlock_t*)mtx_) == 0;
+  else if (type_ == RW)
+    return pthread_rwlock_trywrlock((pthread_rwlock_t*)mtx_) == 0;
+  return false;
+}
+
+void Mutex::Unlock() {
+  CHECK(alive_);
+  if (type_ == Normal)
+    CHECK_EQ(pthread_mutex_unlock((pthread_mutex_t*)mtx_), 0);
+  else if (type_ == Spin)
+    CHECK_EQ(pthread_spin_unlock((pthread_spinlock_t*)mtx_), 0);
+  else if (type_ == RW)
+    CHECK_EQ(pthread_rwlock_unlock((pthread_rwlock_t*)mtx_), 0);
+}
+
+void Mutex::ReadLock() {
+  CHECK(alive_);
+  CHECK(type_ == RW);
+  CHECK_EQ(pthread_rwlock_rdlock((pthread_rwlock_t*)mtx_), 0);
+}
+
+bool Mutex::TryReadLock() {
+  CHECK(alive_);
+  CHECK(type_ == RW);
+  return pthread_rwlock_tryrdlock((pthread_rwlock_t*)mtx_) ==  0;
+}
+
+void Mutex::ReadUnlock() {
+  CHECK(alive_);
+  CHECK(type_ == RW);
+  CHECK_EQ(pthread_rwlock_unlock((pthread_rwlock_t*)mtx_), 0);
+}
+
+struct Event {
+  enum Type {
+    SHUTDOWN,
+    READ,
+    WRITE,
+    VPTR_UPDATE,
+    CALL,
+    RETURN,
+    MUTEX_CREATE,
+    MUTEX_DESTROY,
+    MUTEX_LOCK,
+    MUTEX_TRYLOCK,
+    MUTEX_UNLOCK,
+    MUTEX_READLOCK,
+    MUTEX_TRYREADLOCK,
+    MUTEX_READUNLOCK,
+    MEMCPY,
+    MEMSET
+  };
+  Type type;
+  void *ptr;
+  uptr arg;
+  uptr arg2;
+  bool res;
+  bool expect_report;
+  ReportType report_type;
+
+  Event(Type type, const void *ptr = 0, uptr arg = 0, uptr arg2 = 0)
+    : type(type)
+    , ptr(const_cast<void*>(ptr))
+    , arg(arg)
+    , arg2(arg2)
+    , res()
+    , expect_report()
+    , report_type() {
+  }
+
+  void ExpectReport(ReportType type) {
+    expect_report = true;
+    report_type = type;
+  }
+};
+
+struct ScopedThread::Impl {
+  pthread_t thread;
+  bool main;
+  bool detached;
+  atomic_uintptr_t event;  // Event*
+
+  static void *ScopedThreadCallback(void *arg);
+  void send(Event *ev);
+  void HandleEvent(Event *ev);
+};
+
+void ScopedThread::Impl::HandleEvent(Event *ev) {
+  CHECK_EQ(expect_report, false);
+  expect_report = ev->expect_report;
+  expect_report_reported = false;
+  expect_report_type = ev->report_type;
+  switch (ev->type) {
+  case Event::READ:
+  case Event::WRITE: {
+    void (*tsan_mop)(void *addr) = 0;
+    if (ev->type == Event::READ) {
+      switch (ev->arg /*size*/) {
+        case 1: tsan_mop = __tsan_read1; break;
+        case 2: tsan_mop = __tsan_read2; break;
+        case 4: tsan_mop = __tsan_read4; break;
+        case 8: tsan_mop = __tsan_read8; break;
+        case 16: tsan_mop = __tsan_read16; break;
+      }
+    } else {
+      switch (ev->arg /*size*/) {
+        case 1: tsan_mop = __tsan_write1; break;
+        case 2: tsan_mop = __tsan_write2; break;
+        case 4: tsan_mop = __tsan_write4; break;
+        case 8: tsan_mop = __tsan_write8; break;
+        case 16: tsan_mop = __tsan_write16; break;
+      }
+    }
+    CHECK_NE(tsan_mop, 0);
+    errno = ECHRNG;
+    tsan_mop(ev->ptr);
+    CHECK_EQ(errno, ECHRNG);  // In no case must errno be changed.
+    break;
+  }
+  case Event::VPTR_UPDATE:
+    __tsan_vptr_update((void**)ev->ptr, (void*)ev->arg);
+    break;
+  case Event::CALL:
+    __tsan_func_entry((void*)((uptr)ev->ptr));
+    break;
+  case Event::RETURN:
+    __tsan_func_exit();
+    break;
+  case Event::MUTEX_CREATE:
+    static_cast<Mutex*>(ev->ptr)->Init();
+    break;
+  case Event::MUTEX_DESTROY:
+    static_cast<Mutex*>(ev->ptr)->Destroy();
+    break;
+  case Event::MUTEX_LOCK:
+    static_cast<Mutex*>(ev->ptr)->Lock();
+    break;
+  case Event::MUTEX_TRYLOCK:
+    ev->res = static_cast<Mutex*>(ev->ptr)->TryLock();
+    break;
+  case Event::MUTEX_UNLOCK:
+    static_cast<Mutex*>(ev->ptr)->Unlock();
+    break;
+  case Event::MUTEX_READLOCK:
+    static_cast<Mutex*>(ev->ptr)->ReadLock();
+    break;
+  case Event::MUTEX_TRYREADLOCK:
+    ev->res = static_cast<Mutex*>(ev->ptr)->TryReadLock();
+    break;
+  case Event::MUTEX_READUNLOCK:
+    static_cast<Mutex*>(ev->ptr)->ReadUnlock();
+    break;
+  case Event::MEMCPY:
+    memcpy(ev->ptr, (void*)ev->arg, ev->arg2);
+    break;
+  case Event::MEMSET:
+    memset(ev->ptr, ev->arg, ev->arg2);
+    break;
+  default: CHECK(0);
+  }
+  if (expect_report && !expect_report_reported) {
+    printf("Missed expected report of type %d\n", (int)ev->report_type);
+    EXPECT_FALSE("Missed expected race");
+  }
+  expect_report = false;
+}
+
+void *ScopedThread::Impl::ScopedThreadCallback(void *arg) {
+  __tsan_func_entry(__builtin_return_address(0));
+  Impl *impl = (Impl*)arg;
+  for (;;) {
+    Event* ev = (Event*)atomic_load(&impl->event, memory_order_acquire);
+    if (ev == 0) {
+      pthread_yield();
+      continue;
+    }
+    if (ev->type == Event::SHUTDOWN) {
+      atomic_store(&impl->event, 0, memory_order_release);
+      break;
+    }
+    impl->HandleEvent(ev);
+    atomic_store(&impl->event, 0, memory_order_release);
+  }
+  __tsan_func_exit();
+  return 0;
+}
+
+void ScopedThread::Impl::send(Event *e) {
+  if (main) {
+    HandleEvent(e);
+  } else {
+    CHECK_EQ(atomic_load(&event, memory_order_relaxed), 0);
+    atomic_store(&event, (uintptr_t)e, memory_order_release);
+    while (atomic_load(&event, memory_order_acquire) != 0)
+      pthread_yield();
+  }
+}
+
+ScopedThread::ScopedThread(bool detached, bool main) {
+  impl_ = new Impl;
+  impl_->main = main;
+  impl_->detached = detached;
+  atomic_store(&impl_->event, 0, memory_order_relaxed);
+  if (!main) {
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    pthread_attr_setdetachstate(&attr, detached);
+    pthread_create(&impl_->thread, &attr,
+        ScopedThread::Impl::ScopedThreadCallback, impl_);
+  }
+}
+
+ScopedThread::~ScopedThread() {
+  if (!impl_->main) {
+    Event event(Event::SHUTDOWN);
+    impl_->send(&event);
+    if (!impl_->detached)
+      pthread_join(impl_->thread, 0);
+  }
+  delete impl_;
+}
+
+void ScopedThread::Detach() {
+  CHECK(!impl_->main);
+  CHECK(!impl_->detached);
+  impl_->detached = true;
+  pthread_detach(impl_->thread);
+}
+
+void ScopedThread::Access(void *addr, bool is_write,
+                          int size, bool expect_race) {
+  Event event(is_write ? Event::WRITE : Event::READ, addr, size);
+  if (expect_race)
+    event.ExpectReport(ReportTypeRace);
+  impl_->send(&event);
+}
+
+void ScopedThread::VptrUpdate(const MemLoc &vptr,
+                              const MemLoc &new_val,
+                              bool expect_race) {
+  Event event(Event::VPTR_UPDATE, vptr.loc(), (uptr)new_val.loc());
+  if (expect_race)
+    event.ExpectReport(ReportTypeRace);
+  impl_->send(&event);
+}
+
+void ScopedThread::Call(void(*pc)()) {
+  Event event(Event::CALL, (void*)pc);
+  impl_->send(&event);
+}
+
+void ScopedThread::Return() {
+  Event event(Event::RETURN);
+  impl_->send(&event);
+}
+
+void ScopedThread::Create(const Mutex &m) {
+  Event event(Event::MUTEX_CREATE, &m);
+  impl_->send(&event);
+}
+
+void ScopedThread::Destroy(const Mutex &m) {
+  Event event(Event::MUTEX_DESTROY, &m);
+  impl_->send(&event);
+}
+
+void ScopedThread::Lock(const Mutex &m) {
+  Event event(Event::MUTEX_LOCK, &m);
+  impl_->send(&event);
+}
+
+bool ScopedThread::TryLock(const Mutex &m) {
+  Event event(Event::MUTEX_TRYLOCK, &m);
+  impl_->send(&event);
+  return event.res;
+}
+
+void ScopedThread::Unlock(const Mutex &m) {
+  Event event(Event::MUTEX_UNLOCK, &m);
+  impl_->send(&event);
+}
+
+void ScopedThread::ReadLock(const Mutex &m) {
+  Event event(Event::MUTEX_READLOCK, &m);
+  impl_->send(&event);
+}
+
+bool ScopedThread::TryReadLock(const Mutex &m) {
+  Event event(Event::MUTEX_TRYREADLOCK, &m);
+  impl_->send(&event);
+  return event.res;
+}
+
+void ScopedThread::ReadUnlock(const Mutex &m) {
+  Event event(Event::MUTEX_READUNLOCK, &m);
+  impl_->send(&event);
+}
+
+void ScopedThread::Memcpy(void *dst, const void *src, int size,
+                          bool expect_race) {
+  Event event(Event::MEMCPY, dst, (uptr)src, size);
+  if (expect_race)
+    event.ExpectReport(ReportTypeRace);
+  impl_->send(&event);
+}
+
+void ScopedThread::Memset(void *dst, int val, int size,
+                          bool expect_race) {
+  Event event(Event::MEMSET, dst, val, size);
+  if (expect_race)
+    event.ExpectReport(ReportTypeRace);
+  impl_->send(&event);
+}
diff --git a/lib/tsan/rtl_tests/tsan_thread.cc b/lib/tsan/rtl_tests/tsan_thread.cc
new file mode 100644
index 0000000..4ee7c5f
--- /dev/null
+++ b/lib/tsan/rtl_tests/tsan_thread.cc
@@ -0,0 +1,59 @@
+//===-- tsan_thread.cc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_test_util.h"
+#include "gtest/gtest.h"
+
+TEST(ThreadSanitizer, ThreadSync) {
+  MainThread t0;
+  MemLoc l;
+  t0.Write1(l);
+  {
+    ScopedThread t1;
+    t1.Write1(l);
+  }
+  t0.Write1(l);
+}
+
+TEST(ThreadSanitizer, ThreadDetach1) {
+  ScopedThread t1(true);
+  MemLoc l;
+  t1.Write1(l);
+}
+
+TEST(ThreadSanitizer, ThreadDetach2) {
+  ScopedThread t1;
+  MemLoc l;
+  t1.Write1(l);
+  t1.Detach();
+}
+
+static void *thread_alot_func(void *arg) {
+  (void)arg;
+  int usleep(unsigned);
+  usleep(50);
+  return 0;
+}
+
+TEST(DISABLED_SLOW_ThreadSanitizer, ThreadALot) {
+  const int kThreads = 70000;
+  const int kAlive = 1000;
+  pthread_t threads[kAlive] = {};
+  for (int i = 0; i < kThreads; i++) {
+    if (threads[i % kAlive])
+      pthread_join(threads[i % kAlive], 0);
+    pthread_create(&threads[i % kAlive], 0, thread_alot_func, 0);
+  }
+  for (int i = 0; i < kAlive; i++) {
+    pthread_join(threads[i], 0);
+  }
+}
diff --git a/lib/tsan/unit_tests/tsan_allocator_test.cc b/lib/tsan/unit_tests/tsan_allocator_test.cc
new file mode 100644
index 0000000..e9d0963
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_allocator_test.cc
@@ -0,0 +1,56 @@
+//===-- tsan_allocator_test.c------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_allocator.h"
+#include "gtest/gtest.h"
+#include <stdlib.h>
+
+namespace __tsan {
+
+TEST(Allocator, Basic) {
+  char *p = (char*)Alloc(10);
+  EXPECT_NE(p, (char*)0);
+  char *p2 = (char*)Alloc(20);
+  EXPECT_NE(p2, (char*)0);
+  EXPECT_NE(p2, p);
+  for (int i = 0; i < 10; i++) {
+    p[i] = 42;
+    EXPECT_EQ(p, AllocBlock(p + i));
+  }
+  for (int i = 0; i < 20; i++) {
+    ((char*)p2)[i] = 42;
+    EXPECT_EQ(p2, AllocBlock(p2 + i));
+  }
+  Free(p);
+  Free(p2);
+}
+
+TEST(Allocator, Stress) {
+  const int kCount = 1000;
+  char *ptrs[kCount];
+  unsigned rnd = 42;
+  for (int i = 0; i < kCount; i++) {
+    uptr sz = rand_r(&rnd) % 1000;
+    char *p = (char*)Alloc(sz);
+    EXPECT_NE(p, (char*)0);
+    for (uptr j = 0; j < sz; j++) {
+      p[j] = 42;
+      EXPECT_EQ(p, AllocBlock(p + j));
+    }
+    ptrs[i] = p;
+  }
+  for (int i = 0; i < kCount; i++) {
+    Free(ptrs[i]);
+  }
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_clock_test.cc b/lib/tsan/unit_tests/tsan_clock_test.cc
new file mode 100644
index 0000000..9c35fb5
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_clock_test.cc
@@ -0,0 +1,123 @@
+//===-- tsan_clock_test.cc --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_clock.h"
+#include "tsan_rtl.h"
+#include "gtest/gtest.h"
+
+namespace __tsan {
+
+TEST(Clock, VectorBasic) {
+  ScopedInRtl in_rtl;
+  ThreadClock clk;
+  CHECK_EQ(clk.size(), 0);
+  clk.tick(0);
+  CHECK_EQ(clk.size(), 1);
+  CHECK_EQ(clk.get(0), 1);
+  clk.tick(3);
+  CHECK_EQ(clk.size(), 4);
+  CHECK_EQ(clk.get(0), 1);
+  CHECK_EQ(clk.get(1), 0);
+  CHECK_EQ(clk.get(2), 0);
+  CHECK_EQ(clk.get(3), 1);
+  clk.tick(3);
+  CHECK_EQ(clk.get(3), 2);
+}
+
+TEST(Clock, ChunkedBasic) {
+  ScopedInRtl in_rtl;
+  ThreadClock vector;
+  SyncClock chunked;
+  CHECK_EQ(vector.size(), 0);
+  CHECK_EQ(chunked.size(), 0);
+  vector.acquire(&chunked);
+  CHECK_EQ(vector.size(), 0);
+  CHECK_EQ(chunked.size(), 0);
+  vector.release(&chunked);
+  CHECK_EQ(vector.size(), 0);
+  CHECK_EQ(chunked.size(), 0);
+  vector.acq_rel(&chunked);
+  CHECK_EQ(vector.size(), 0);
+  CHECK_EQ(chunked.size(), 0);
+}
+
+TEST(Clock, AcquireRelease) {
+  ScopedInRtl in_rtl;
+  ThreadClock vector1;
+  vector1.tick(100);
+  SyncClock chunked;
+  vector1.release(&chunked);
+  CHECK_EQ(chunked.size(), 101);
+  ThreadClock vector2;
+  vector2.acquire(&chunked);
+  CHECK_EQ(vector2.size(), 101);
+  CHECK_EQ(vector2.get(0), 0);
+  CHECK_EQ(vector2.get(1), 0);
+  CHECK_EQ(vector2.get(99), 0);
+  CHECK_EQ(vector2.get(100), 1);
+}
+
+TEST(Clock, ManyThreads) {
+  ScopedInRtl in_rtl;
+  SyncClock chunked;
+  for (int i = 0; i < 100; i++) {
+    ThreadClock vector;
+    vector.tick(i);
+    vector.release(&chunked);
+    CHECK_EQ(chunked.size(), i + 1);
+    vector.acquire(&chunked);
+    CHECK_EQ(vector.size(), i + 1);
+  }
+  ThreadClock vector;
+  vector.acquire(&chunked);
+  CHECK_EQ(vector.size(), 100);
+  for (int i = 0; i < 100; i++)
+    CHECK_EQ(vector.get(i), 1);
+}
+
+TEST(Clock, DifferentSizes) {
+  ScopedInRtl in_rtl;
+  {
+    ThreadClock vector1;
+    vector1.tick(10);
+    ThreadClock vector2;
+    vector2.tick(20);
+    {
+      SyncClock chunked;
+      vector1.release(&chunked);
+      CHECK_EQ(chunked.size(), 11);
+      vector2.release(&chunked);
+      CHECK_EQ(chunked.size(), 21);
+    }
+    {
+      SyncClock chunked;
+      vector2.release(&chunked);
+      CHECK_EQ(chunked.size(), 21);
+      vector1.release(&chunked);
+      CHECK_EQ(chunked.size(), 21);
+    }
+    {
+      SyncClock chunked;
+      vector1.release(&chunked);
+      vector2.acquire(&chunked);
+      CHECK_EQ(vector2.size(), 21);
+    }
+    {
+      SyncClock chunked;
+      vector2.release(&chunked);
+      vector1.acquire(&chunked);
+      CHECK_EQ(vector1.size(), 21);
+    }
+  }
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_flags_test.cc b/lib/tsan/unit_tests/tsan_flags_test.cc
new file mode 100644
index 0000000..2e63011
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_flags_test.cc
@@ -0,0 +1,107 @@
+//===-- tsan_flags_test.cc --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_flags.h"
+#include "tsan_rtl.h"
+#include "gtest/gtest.h"
+
+namespace __tsan {
+
+TEST(Flags, Basic) {
+  ScopedInRtl in_rtl;
+  // At least should not crash.
+  Flags f = {};
+  InitializeFlags(&f, 0);
+  InitializeFlags(&f, "");
+}
+
+TEST(Flags, ParseBool) {
+  ScopedInRtl in_rtl;
+  Flags f = {};
+
+  f.enable_annotations = false;
+  InitializeFlags(&f, "enable_annotations");
+  EXPECT_EQ(f.enable_annotations, true);
+
+  f.enable_annotations = false;
+  InitializeFlags(&f, "--enable_annotations");
+  EXPECT_EQ(f.enable_annotations, true);
+
+  f.enable_annotations = false;
+  InitializeFlags(&f, "--enable_annotations=1");
+  EXPECT_EQ(f.enable_annotations, true);
+
+  // This flag is false by default.
+  f.force_seq_cst_atomics = false;
+  InitializeFlags(&f, "--force_seq_cst_atomics=1");
+  EXPECT_EQ(f.force_seq_cst_atomics, true);
+
+  f.enable_annotations = true;
+  InitializeFlags(&f, "asdas enable_annotations=0 asdasd");
+  EXPECT_EQ(f.enable_annotations, false);
+
+  f.enable_annotations = true;
+  InitializeFlags(&f, "   --enable_annotations=0   ");
+  EXPECT_EQ(f.enable_annotations, false);
+}
+
+TEST(Flags, ParseInt) {
+  ScopedInRtl in_rtl;
+  Flags f = {};
+
+  f.exitcode = -11;
+  InitializeFlags(&f, "exitcode");
+  EXPECT_EQ(f.exitcode, 0);
+
+  f.exitcode = -11;
+  InitializeFlags(&f, "--exitcode=");
+  EXPECT_EQ(f.exitcode, 0);
+
+  f.exitcode = -11;
+  InitializeFlags(&f, "--exitcode=42");
+  EXPECT_EQ(f.exitcode, 42);
+
+  f.exitcode = -11;
+  InitializeFlags(&f, "--exitcode=-42");
+  EXPECT_EQ(f.exitcode, -42);
+}
+
+TEST(Flags, ParseStr) {
+  ScopedInRtl in_rtl;
+  Flags f = {};
+
+  InitializeFlags(&f, 0);
+  EXPECT_EQ(0, strcmp(f.strip_path_prefix, ""));
+  FinalizeFlags(&f);
+
+  InitializeFlags(&f, "strip_path_prefix");
+  EXPECT_EQ(0, strcmp(f.strip_path_prefix, ""));
+  FinalizeFlags(&f);
+
+  InitializeFlags(&f, "--strip_path_prefix=");
+  EXPECT_EQ(0, strcmp(f.strip_path_prefix, ""));
+  FinalizeFlags(&f);
+
+  InitializeFlags(&f, "--strip_path_prefix=abc");
+  EXPECT_EQ(0, strcmp(f.strip_path_prefix, "abc"));
+  FinalizeFlags(&f);
+
+  InitializeFlags(&f, "--strip_path_prefix='abc zxc'");
+  EXPECT_EQ(0, strcmp(f.strip_path_prefix, "abc zxc"));
+  FinalizeFlags(&f);
+
+  InitializeFlags(&f, "--strip_path_prefix=\"abc zxc\"");
+  EXPECT_EQ(0, strcmp(f.strip_path_prefix, "abc zxc"));
+  FinalizeFlags(&f);
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_mman_test.cc b/lib/tsan/unit_tests/tsan_mman_test.cc
new file mode 100644
index 0000000..5af803b
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_mman_test.cc
@@ -0,0 +1,109 @@
+//===-- tsan_mman_test.cc ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_mman.h"
+#include "tsan_rtl.h"
+#include "gtest/gtest.h"
+
+namespace __tsan {
+
+TEST(Mman, Internal) {
+  ScopedInRtl in_rtl;
+  char *p = (char*)internal_alloc(MBlockScopedBuf, 10);
+  EXPECT_NE(p, (char*)0);
+  char *p2 = (char*)internal_alloc(MBlockScopedBuf, 20);
+  EXPECT_NE(p2, (char*)0);
+  EXPECT_NE(p2, p);
+  for (int i = 0; i < 10; i++) {
+    p[i] = 42;
+  }
+  for (int i = 0; i < 20; i++) {
+    ((char*)p2)[i] = 42;
+  }
+  internal_free(p);
+  internal_free(p2);
+}
+
+TEST(Mman, User) {
+  ScopedInRtl in_rtl;
+  ThreadState *thr = cur_thread();
+  uptr pc = 0;
+  char *p = (char*)user_alloc(thr, pc, 10);
+  EXPECT_NE(p, (char*)0);
+  char *p2 = (char*)user_alloc(thr, pc, 20);
+  EXPECT_NE(p2, (char*)0);
+  EXPECT_NE(p2, p);
+  MBlock *b = user_mblock(thr, p);
+  EXPECT_NE(b, (MBlock*)0);
+  EXPECT_EQ(b->size, (uptr)10);
+  MBlock *b2 = user_mblock(thr, p2);
+  EXPECT_NE(b2, (MBlock*)0);
+  EXPECT_EQ(b2->size, (uptr)20);
+  for (int i = 0; i < 10; i++) {
+    p[i] = 42;
+    EXPECT_EQ(b, user_mblock(thr, p + i));
+  }
+  for (int i = 0; i < 20; i++) {
+    ((char*)p2)[i] = 42;
+    EXPECT_EQ(b2, user_mblock(thr, p2 + i));
+  }
+  user_free(thr, pc, p);
+  user_free(thr, pc, p2);
+}
+
+TEST(Mman, UserRealloc) {
+  ScopedInRtl in_rtl;
+  ThreadState *thr = cur_thread();
+  uptr pc = 0;
+  {
+    void *p = user_realloc(thr, pc, 0, 0);
+    // Strictly saying this is incorrect, realloc(NULL, N) is equivalent to
+    // malloc(N), thus must return non-NULL pointer.
+    EXPECT_EQ(p, (void*)0);
+  }
+  {
+    void *p = user_realloc(thr, pc, 0, 100);
+    EXPECT_NE(p, (void*)0);
+    memset(p, 0xde, 100);
+    user_free(thr, pc, p);
+  }
+  {
+    void *p = user_alloc(thr, pc, 100);
+    EXPECT_NE(p, (void*)0);
+    memset(p, 0xde, 100);
+    void *p2 = user_realloc(thr, pc, p, 0);
+    EXPECT_EQ(p2, (void*)0);
+  }
+  {
+    void *p = user_realloc(thr, pc, 0, 100);
+    EXPECT_NE(p, (void*)0);
+    memset(p, 0xde, 100);
+    void *p2 = user_realloc(thr, pc, p, 10000);
+    EXPECT_NE(p2, (void*)0);
+    for (int i = 0; i < 100; i++)
+      EXPECT_EQ(((char*)p2)[i], (char)0xde);
+    memset(p2, 0xde, 10000);
+    user_free(thr, pc, p2);
+  }
+  {
+    void *p = user_realloc(thr, pc, 0, 10000);
+    EXPECT_NE(p, (void*)0);
+    memset(p, 0xde, 10000);
+    void *p2 = user_realloc(thr, pc, p, 10);
+    EXPECT_NE(p2, (void*)0);
+    for (int i = 0; i < 10; i++)
+      EXPECT_EQ(((char*)p2)[i], (char)0xde);
+    user_free(thr, pc, p2);
+  }
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_mutex_test.cc b/lib/tsan/unit_tests/tsan_mutex_test.cc
new file mode 100644
index 0000000..77c0cda
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_mutex_test.cc
@@ -0,0 +1,101 @@
+//===-- tsan_mutex_test.cc --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_atomic.h"
+#include "tsan_mutex.h"
+#include "gtest/gtest.h"
+
+namespace __tsan {
+
+class TestData {
+ public:
+  TestData()
+    : mtx_(MutexTypeAnnotations, StatMtxAnnotations) {
+    for (int i = 0; i < kSize; i++)
+      data_[i] = 0;
+  }
+
+  void Write() {
+    Lock l(&mtx_);
+    T v0 = data_[0];
+    for (int i = 0; i < kSize; i++) {
+      CHECK_EQ(data_[i], v0);
+      data_[i]++;
+    }
+  }
+
+  void Read() {
+    ReadLock l(&mtx_);
+    T v0 = data_[0];
+    for (int i = 0; i < kSize; i++) {
+      CHECK_EQ(data_[i], v0);
+    }
+  }
+
+ private:
+  static const int kSize = 64;
+  typedef u64 T;
+  Mutex mtx_;
+  char pad_[kCacheLineSize];
+  T data_[kSize];
+};
+
+const int kThreads = 8;
+const int kWriteRate = 1024;
+#if TSAN_DEBUG
+const int kIters = 16*1024;
+#else
+const int kIters = 64*1024;
+#endif
+
+static void *write_mutex_thread(void *param) {
+  TestData *data = (TestData *)param;
+  TestData local;
+  for (int i = 0; i < kIters; i++) {
+    data->Write();
+    local.Write();
+  }
+  return 0;
+}
+
+static void *read_mutex_thread(void *param) {
+  TestData *data = (TestData *)param;
+  TestData local;
+  for (int i = 0; i < kIters; i++) {
+    if ((i % kWriteRate) == 0)
+      data->Write();
+    else
+      data->Read();
+    local.Write();
+  }
+  return 0;
+}
+
+TEST(Mutex, Write) {
+  TestData data;
+  pthread_t threads[kThreads];
+  for (int i = 0; i < kThreads; i++)
+    pthread_create(&threads[i], 0, write_mutex_thread, &data);
+  for (int i = 0; i < kThreads; i++)
+    pthread_join(threads[i], 0);
+}
+
+TEST(Mutex, ReadWrite) {
+  TestData data;
+  pthread_t threads[kThreads];
+  for (int i = 0; i < kThreads; i++)
+    pthread_create(&threads[i], 0, read_mutex_thread, &data);
+  for (int i = 0; i < kThreads; i++)
+    pthread_join(threads[i], 0);
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_platform_test.cc b/lib/tsan/unit_tests/tsan_platform_test.cc
new file mode 100644
index 0000000..16ee6bd
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_platform_test.cc
@@ -0,0 +1,83 @@
+//===-- tsan_platform_test.cc -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_platform.h"
+#include "gtest/gtest.h"
+
+namespace __tsan {
+
+static void *TestThreadInfo(void *arg) {
+  ScopedInRtl in_rtl;
+  uptr stk_addr = 0;
+  uptr stk_size = 0;
+  uptr tls_addr = 0;
+  uptr tls_size = 0;
+  GetThreadStackAndTls(&stk_addr, &stk_size, &tls_addr, &tls_size);
+  // Printf("stk=%lx-%lx(%lu)\n", stk_addr, stk_addr + stk_size, stk_size);
+  // Printf("tls=%lx-%lx(%lu)\n", tls_addr, tls_addr + tls_size, tls_size);
+
+  int stack_var;
+  EXPECT_NE(stk_addr, (uptr)0);
+  EXPECT_NE(stk_size, (uptr)0);
+  EXPECT_GT((uptr)&stack_var, stk_addr);
+  EXPECT_LT((uptr)&stack_var, stk_addr + stk_size);
+
+  static __thread int thread_var;
+  EXPECT_NE(tls_addr, (uptr)0);
+  EXPECT_NE(tls_size, (uptr)0);
+  EXPECT_GT((uptr)&thread_var, tls_addr);
+  EXPECT_LT((uptr)&thread_var, tls_addr + tls_size);
+
+  // Ensure that tls and stack do not intersect.
+  uptr tls_end = tls_addr + tls_size;
+  EXPECT_TRUE(tls_addr < stk_addr || tls_addr >= stk_addr + stk_size);
+  EXPECT_TRUE(tls_end  < stk_addr || tls_end  >=  stk_addr + stk_size);
+  EXPECT_TRUE((tls_addr < stk_addr) == (tls_end  < stk_addr));
+  return 0;
+}
+
+TEST(Platform, ThreadInfoMain) {
+  TestThreadInfo(0);
+}
+
+TEST(Platform, ThreadInfoWorker) {
+  pthread_t t;
+  pthread_create(&t, 0, TestThreadInfo, 0);
+  pthread_join(t, 0);
+}
+
+TEST(Platform, FileOps) {
+  const char *str1 = "qwerty";
+  uptr len1 = internal_strlen(str1);
+  const char *str2 = "zxcv";
+  uptr len2 = internal_strlen(str2);
+
+  fd_t fd = internal_open("./tsan_test.tmp", true);
+  EXPECT_NE(fd, kInvalidFd);
+  EXPECT_EQ(len1, internal_write(fd, str1, len1));
+  EXPECT_EQ(len2, internal_write(fd, str2, len2));
+  internal_close(fd);
+  
+  fd = internal_open("./tsan_test.tmp", false);
+  EXPECT_NE(fd, kInvalidFd);
+  EXPECT_EQ(len1 + len2, internal_filesize(fd));
+  char buf[64] = {};
+  EXPECT_EQ(len1, internal_read(fd, buf, len1));
+  EXPECT_EQ(0, internal_memcmp(buf, str1, len1));
+  EXPECT_EQ((char)0, buf[len1 + 1]);
+  internal_memset(buf, 0, len1);
+  EXPECT_EQ(len2, internal_read(fd, buf, len2));
+  EXPECT_EQ(0, internal_memcmp(buf, str2, len2));
+  internal_close(fd);
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_printf_test.cc b/lib/tsan/unit_tests/tsan_printf_test.cc
new file mode 100644
index 0000000..1aed141
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_printf_test.cc
@@ -0,0 +1,109 @@
+//===-- tsan_printf_test.cc -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_rtl.h"
+#include "gtest/gtest.h"
+
+#include <string.h>
+#include <limits.h>
+
+namespace __tsan {
+
+TEST(Printf, Basic) {
+  char buf[1024];
+  uptr len = Snprintf(buf, sizeof(buf),
+      "a%db%ldc%lldd%ue%luf%llug%xh%lxq%llxw%pe%sr",
+      (int)-1, (long)-2, (long long)-3,  // NOLINT
+      (unsigned)-4, (unsigned long)5, (unsigned long long)6,  // NOLINT
+      (unsigned)10, (unsigned long)11, (unsigned long long)12,  // NOLINT
+      (void*)0x123, "_string_");
+  EXPECT_EQ(len, strlen(buf));
+  EXPECT_EQ(0, strcmp(buf, "a-1b-2c-3d4294967292e5f6gahbqcw"
+                           "0x000000000123e_string_r"));
+}
+
+TEST(Printf, OverflowStr) {
+  char buf[] = "123456789";
+  uptr len = Snprintf(buf, 4, "%s", "abcdef");
+  EXPECT_EQ(len, (uptr)6);
+  EXPECT_EQ(0, strcmp(buf, "abc"));
+  EXPECT_EQ(buf[3], 0);
+  EXPECT_EQ(buf[4], '5');
+  EXPECT_EQ(buf[5], '6');
+  EXPECT_EQ(buf[6], '7');
+  EXPECT_EQ(buf[7], '8');
+  EXPECT_EQ(buf[8], '9');
+  EXPECT_EQ(buf[9], 0);
+}
+
+TEST(Printf, OverflowInt) {
+  char buf[] = "123456789";
+  Snprintf(buf, 4, "%d", -123456789);
+  EXPECT_EQ(0, strcmp(buf, "-12"));
+  EXPECT_EQ(buf[3], 0);
+  EXPECT_EQ(buf[4], '5');
+  EXPECT_EQ(buf[5], '6');
+  EXPECT_EQ(buf[6], '7');
+  EXPECT_EQ(buf[7], '8');
+  EXPECT_EQ(buf[8], '9');
+  EXPECT_EQ(buf[9], 0);
+}
+
+TEST(Printf, OverflowUint) {
+  char buf[] = "123456789";
+  Snprintf(buf, 4, "a%llx", (long long)0x123456789);  // NOLINT
+  EXPECT_EQ(0, strcmp(buf, "a12"));
+  EXPECT_EQ(buf[3], 0);
+  EXPECT_EQ(buf[4], '5');
+  EXPECT_EQ(buf[5], '6');
+  EXPECT_EQ(buf[6], '7');
+  EXPECT_EQ(buf[7], '8');
+  EXPECT_EQ(buf[8], '9');
+  EXPECT_EQ(buf[9], 0);
+}
+
+TEST(Printf, OverflowPtr) {
+  char buf[] = "123456789";
+  Snprintf(buf, 4, "%p", (void*)0x123456789);
+  EXPECT_EQ(0, strcmp(buf, "0x0"));
+  EXPECT_EQ(buf[3], 0);
+  EXPECT_EQ(buf[4], '5');
+  EXPECT_EQ(buf[5], '6');
+  EXPECT_EQ(buf[6], '7');
+  EXPECT_EQ(buf[7], '8');
+  EXPECT_EQ(buf[8], '9');
+  EXPECT_EQ(buf[9], 0);
+}
+
+template<typename T>
+static void TestMinMax(const char *fmt, T min, T max) {
+  char buf[1024];
+  uptr len = Snprintf(buf, sizeof(buf), fmt, min, max);
+  char buf2[1024];
+  snprintf(buf2, sizeof(buf2), fmt, min, max);
+  EXPECT_EQ(len, strlen(buf));
+  EXPECT_EQ(0, strcmp(buf, buf2));
+}
+
+TEST(Printf, MinMax) {
+  TestMinMax<int>("%d-%d", INT_MIN, INT_MAX);  // NOLINT
+  TestMinMax<long>("%ld-%ld", LONG_MIN, LONG_MAX);  // NOLINT
+  TestMinMax<long long>("%lld-%lld", LLONG_MIN, LLONG_MAX);  // NOLINT
+  TestMinMax<unsigned>("%u-%u", 0, UINT_MAX);  // NOLINT
+  TestMinMax<unsigned long>("%lu-%lu", 0, ULONG_MAX);  // NOLINT
+  TestMinMax<unsigned long long>("%llu-%llu", 0, ULLONG_MAX);  // NOLINT
+  TestMinMax<unsigned>("%x-%x", 0, UINT_MAX);  // NOLINT
+  TestMinMax<unsigned long>("%lx-%lx", 0, ULONG_MAX);  // NOLINT
+  TestMinMax<unsigned long long>("%llx-%llx", 0, ULLONG_MAX);  // NOLINT
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_shadow_test.cc b/lib/tsan/unit_tests/tsan_shadow_test.cc
new file mode 100644
index 0000000..81c076e
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_shadow_test.cc
@@ -0,0 +1,47 @@
+//===-- tsan_shadow_test.cc -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_platform.h"
+#include "gtest/gtest.h"
+
+namespace __tsan {
+
+TEST(Shadow, Mapping) {
+  static int global;
+  int stack;
+  void *heap = malloc(0);
+  free(heap);
+
+  CHECK(IsAppMem((uptr)&global));
+  CHECK(IsAppMem((uptr)&stack));
+  CHECK(IsAppMem((uptr)heap));
+
+  CHECK(IsShadowMem(MemToShadow((uptr)&global)));
+  CHECK(IsShadowMem(MemToShadow((uptr)&stack)));
+  CHECK(IsShadowMem(MemToShadow((uptr)heap)));
+}
+
+TEST(Shadow, Celling) {
+  u64 aligned_data[4];
+  char *data = (char*)aligned_data;
+  CHECK_EQ((uptr)data % kShadowSize, 0);
+  uptr s0 = MemToShadow((uptr)&data[0]);
+  CHECK_EQ(s0 % kShadowSize, 0);
+  for (unsigned i = 1; i < kShadowCell; i++)
+    CHECK_EQ(s0, MemToShadow((uptr)&data[i]));
+  for (unsigned i = kShadowCell; i < 2*kShadowCell; i++)
+    CHECK_EQ(s0 + kShadowSize*kShadowCnt, MemToShadow((uptr)&data[i]));
+  for (unsigned i = 2*kShadowCell; i < 3*kShadowCell; i++)
+    CHECK_EQ(s0 + 2*kShadowSize*kShadowCnt, MemToShadow((uptr)&data[i]));
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_suppressions_test.cc b/lib/tsan/unit_tests/tsan_suppressions_test.cc
new file mode 100644
index 0000000..dbedeb2
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_suppressions_test.cc
@@ -0,0 +1,132 @@
+//===-- tsan_suppressions_test.cc -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_suppressions.h"
+#include "tsan_rtl.h"
+#include "gtest/gtest.h"
+
+#include <string.h>
+
+namespace __tsan {
+
+TEST(Suppressions, Parse) {
+  ScopedInRtl in_rtl;
+  Suppression *supp0 = SuppressionParse(
+    "race:foo\n"
+    " 	race:bar\n"  // NOLINT
+    "race:baz	 \n"  // NOLINT
+    "# a comment\n"
+    "race:quz\n"
+  );  // NOLINT
+  Suppression *supp = supp0;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "quz"));
+  supp = supp->next;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "baz"));
+  supp = supp->next;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "bar"));
+  supp = supp->next;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "foo"));
+  supp = supp->next;
+  EXPECT_EQ((Suppression*)0, supp);
+  SuppressionFree(supp0);
+}
+
+TEST(Suppressions, Parse2) {
+  ScopedInRtl in_rtl;
+  Suppression *supp0 = SuppressionParse(
+    "  	# first line comment\n"  // NOLINT
+    " 	race:bar 	\n"  // NOLINT
+    "race:baz* *baz\n"
+    "# a comment\n"
+    "# last line comment\n"
+  );  // NOLINT
+  Suppression *supp = supp0;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "baz* *baz"));
+  supp = supp->next;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "bar"));
+  supp = supp->next;
+  EXPECT_EQ((Suppression*)0, supp);
+  SuppressionFree(supp0);
+}
+
+TEST(Suppressions, Parse3) {
+  ScopedInRtl in_rtl;
+  Suppression *supp0 = SuppressionParse(
+    "# last suppression w/o line-feed\n"
+    "race:foo\n"
+    "race:bar"
+  );  // NOLINT
+  Suppression *supp = supp0;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "bar"));
+  supp = supp->next;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "foo"));
+  supp = supp->next;
+  EXPECT_EQ((Suppression*)0, supp);
+  SuppressionFree(supp0);
+}
+
+TEST(Suppressions, ParseType) {
+  ScopedInRtl in_rtl;
+  Suppression *supp0 = SuppressionParse(
+    "race:foo\n"
+    "thread:bar\n"
+    "mutex:baz\n"
+    "signal:quz\n"
+  );  // NOLINT
+  Suppression *supp = supp0;
+  EXPECT_EQ(supp->type, SuppressionSignal);
+  EXPECT_EQ(0, strcmp(supp->func, "quz"));
+  supp = supp->next;
+  EXPECT_EQ(supp->type, SuppressionMutex);
+  EXPECT_EQ(0, strcmp(supp->func, "baz"));
+  supp = supp->next;
+  EXPECT_EQ(supp->type, SuppressionThread);
+  EXPECT_EQ(0, strcmp(supp->func, "bar"));
+  supp = supp->next;
+  EXPECT_EQ(supp->type, SuppressionRace);
+  EXPECT_EQ(0, strcmp(supp->func, "foo"));
+  supp = supp->next;
+  EXPECT_EQ((Suppression*)0, supp);
+  SuppressionFree(supp0);
+}
+
+static bool MyMatch(const char *templ, const char *func) {
+  char tmp[1024];
+  strcpy(tmp, templ);  // NOLINT
+  return SuppressionMatch(tmp, func);
+}
+
+TEST(Suppressions, Match) {
+  EXPECT_TRUE(MyMatch("foobar", "foobar"));
+  EXPECT_TRUE(MyMatch("foobar", "prefix_foobar_postfix"));
+  EXPECT_TRUE(MyMatch("*foobar*", "prefix_foobar_postfix"));
+  EXPECT_TRUE(MyMatch("foo*bar", "foo_middle_bar"));
+  EXPECT_TRUE(MyMatch("foo*bar", "foobar"));
+  EXPECT_TRUE(MyMatch("foo*bar*baz", "foo_middle_bar_another_baz"));
+  EXPECT_TRUE(MyMatch("foo*bar*baz", "foo_middle_barbaz"));
+
+  EXPECT_FALSE(MyMatch("foo", "baz"));
+  EXPECT_FALSE(MyMatch("foobarbaz", "foobar"));
+  EXPECT_FALSE(MyMatch("foobarbaz", "barbaz"));
+  EXPECT_FALSE(MyMatch("foo*bar", "foobaz"));
+  EXPECT_FALSE(MyMatch("foo*bar", "foo_baz"));
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_sync_test.cc b/lib/tsan/unit_tests/tsan_sync_test.cc
new file mode 100644
index 0000000..58726bf
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_sync_test.cc
@@ -0,0 +1,65 @@
+//===-- tsan_sync_test.cc ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_sync.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "gtest/gtest.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <map>
+
+namespace __tsan {
+
+TEST(Sync, Table) {
+  const uintptr_t kIters = 512*1024;
+  const uintptr_t kRange = 10000;
+
+  ScopedInRtl in_rtl;
+  ThreadState *thr = cur_thread();
+  uptr pc = 0;
+
+  SyncTab tab;
+  SyncVar *golden[kRange] = {};
+  unsigned seed = 0;
+  for (uintptr_t i = 0; i < kIters; i++) {
+    uintptr_t addr = rand_r(&seed) % (kRange - 1) + 1;
+    if (rand_r(&seed) % 2) {
+      // Get or add.
+      SyncVar *v = tab.GetAndLock(thr, pc, addr, true);
+      EXPECT_TRUE(golden[addr] == 0 || golden[addr] == v);
+      EXPECT_EQ(v->addr, addr);
+      golden[addr] = v;
+      v->mtx.Unlock();
+    } else {
+      // Remove.
+      SyncVar *v = tab.GetAndRemove(thr, pc, addr);
+      EXPECT_EQ(golden[addr], v);
+      if (v) {
+        EXPECT_EQ(v->addr, addr);
+        golden[addr] = 0;
+        DestroyAndFree(v);
+      }
+    }
+  }
+  for (uintptr_t addr = 0; addr < kRange; addr++) {
+    if (golden[addr] == 0)
+      continue;
+    SyncVar *v = tab.GetAndRemove(thr, pc, addr);
+    EXPECT_EQ(v, golden[addr]);
+    EXPECT_EQ(v->addr, addr);
+    DestroyAndFree(v);
+  }
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/unit_tests/tsan_vector_test.cc b/lib/tsan/unit_tests/tsan_vector_test.cc
new file mode 100644
index 0000000..96909eb
--- /dev/null
+++ b/lib/tsan/unit_tests/tsan_vector_test.cc
@@ -0,0 +1,45 @@
+//===-- tsan_vector_test.cc -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_vector.h"
+#include "tsan_rtl.h"
+#include "gtest/gtest.h"
+
+namespace __tsan {
+
+TEST(Vector, Basic) {
+  ScopedInRtl in_rtl;
+  Vector<int> v(MBlockScopedBuf);
+  EXPECT_EQ(v.Size(), (uptr)0);
+  v.PushBack(42);
+  EXPECT_EQ(v.Size(), (uptr)1);
+  EXPECT_EQ(v[0], 42);
+  v.PushBack(43);
+  EXPECT_EQ(v.Size(), (uptr)2);
+  EXPECT_EQ(v[0], 42);
+  EXPECT_EQ(v[1], 43);
+}
+
+TEST(Vector, Stride) {
+  ScopedInRtl in_rtl;
+  Vector<int> v(MBlockScopedBuf);
+  for (int i = 0; i < 1000; i++) {
+    v.PushBack(i);
+    EXPECT_EQ(v.Size(), (uptr)(i + 1));
+    EXPECT_EQ(v[i], i);
+  }
+  for (int i = 0; i < 1000; i++) {
+    EXPECT_EQ(v[i], i);
+  }
+}
+
+}  // namespace __tsan
diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c
index f2d3f99..3242bbf 100644
--- a/lib/ucmpdi2.c
+++ b/lib/ucmpdi2.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/udivdi3.c b/lib/udivdi3.c
index bbd551a..6c0303d 100644
--- a/lib/udivdi3.c
+++ b/lib/udivdi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/udivmoddi4.c b/lib/udivmoddi4.c
index c5db21c..57282d5 100644
--- a/lib/udivmoddi4.c
+++ b/lib/udivmoddi4.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
@@ -21,8 +20,6 @@
 
 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
 
-ARM_EABI_FNALIAS(uldivmod, udivmoddi4);
-
 COMPILER_RT_ABI du_int
 __udivmoddi4(du_int a, du_int b, du_int* rem)
 {
@@ -133,7 +130,7 @@
                     *rem = n.s.low & (d.s.low - 1);
                 if (d.s.low == 1)
                     return n.all;
-                unsigned sr = __builtin_ctz(d.s.low);
+                sr = __builtin_ctz(d.s.low);
                 q.s.high = n.s.high >> sr;
                 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
                 return q.all;
diff --git a/lib/udivmodsi4.c b/lib/udivmodsi4.c
index 2a3ee27..5b49089 100644
--- a/lib/udivmodsi4.c
+++ b/lib/udivmodsi4.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/udivmodti4.c b/lib/udivmodti4.c
index d1e19ed..427861b 100644
--- a/lib/udivmodti4.c
+++ b/lib/udivmodti4.c
@@ -132,7 +132,7 @@
                     *rem = n.s.low & (d.s.low - 1);
                 if (d.s.low == 1)
                     return n.all;
-                unsigned sr = __builtin_ctzll(d.s.low);
+                sr = __builtin_ctzll(d.s.low);
                 q.s.high = n.s.high >> sr;
                 q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
                 return q.all;
diff --git a/lib/udivsi3.c b/lib/udivsi3.c
index 721ae89..39ef48b 100644
--- a/lib/udivsi3.c
+++ b/lib/udivsi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/umoddi3.c b/lib/umoddi3.c
index 9de1a64..3541ab6 100644
--- a/lib/umoddi3.c
+++ b/lib/umoddi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/umodsi3.c b/lib/umodsi3.c
index 569b7fc..aae741d 100644
--- a/lib/umodsi3.c
+++ b/lib/umodsi3.c
@@ -11,7 +11,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include "abi.h"
 
 #include "int_lib.h"
 
diff --git a/lib/x86_64/CMakeLists.txt b/lib/x86_64/CMakeLists.txt
deleted file mode 100644
index ee21308..0000000
--- a/lib/x86_64/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-SET( SRCS
- floatdixf.c
- floatdisf.c
- floatdidf.c
- )
diff --git a/lib/x86_64/Makefile.mk b/lib/x86_64/Makefile.mk
index 09037b9..ee3f9ce 100644
--- a/lib/x86_64/Makefile.mk
+++ b/lib/x86_64/Makefile.mk
@@ -7,6 +7,7 @@
 #
 #===------------------------------------------------------------------------===#
 
+ModuleName := builtins
 SubDirs := 
 OnlyArchs := x86_64
 
diff --git a/lib/x86_64/floatdidf.c b/lib/x86_64/floatdidf.c
index cce3cd5..388404e 100644
--- a/lib/x86_64/floatdidf.c
+++ b/lib/x86_64/floatdidf.c
@@ -6,7 +6,7 @@
 
 #ifdef __x86_64__
 
-#include <stdint.h>
+#include "../int_lib.h"
 
 double __floatdidf(int64_t a)
 {
diff --git a/lib/x86_64/floatdisf.c b/lib/x86_64/floatdisf.c
index 753ba90..96c3728 100644
--- a/lib/x86_64/floatdisf.c
+++ b/lib/x86_64/floatdisf.c
@@ -4,7 +4,7 @@
 
 #ifdef __x86_64__
 
-#include <stdint.h>
+#include "../int_lib.h"
 
 float __floatdisf(int64_t a)
 {
diff --git a/lib/x86_64/floatdixf.c b/lib/x86_64/floatdixf.c
index 569f727..c01193a 100644
--- a/lib/x86_64/floatdixf.c
+++ b/lib/x86_64/floatdixf.c
@@ -6,7 +6,7 @@
 
 #ifdef __x86_64__
 
-#include <stdint.h>
+#include "../int_lib.h"
 
 long double __floatdixf(int64_t a)
 {
diff --git a/make/AppleBI.mk b/make/AppleBI.mk
index 0e3473d..96f8222 100644
--- a/make/AppleBI.mk
+++ b/make/AppleBI.mk
@@ -14,17 +14,12 @@
 
 ifeq (,$(SDKROOT))
 	INSTALL_TARGET = install-MacOSX
-    LD_OTHER_FLAGS =
 else
 	INSTALL_TARGET = install-iOS
-	CFLAGS.Release.armv6 := $(CFLAGS) -Wall -Os -fomit-frame-pointer -g -isysroot $(SDKROOT)
-	CFLAGS.Release.armv7 := $(CFLAGS) -Wall -Os -fomit-frame-pointer -g -isysroot $(SDKROOT)
-	CFLAGS.Static.armv6  := $(CFLAGS) -Wall -Os -fomit-frame-pointer -g -isysroot $(SDKROOT) -static 
-	CFLAGS.Static.armv7  := $(CFLAGS) -Wall -Os -fomit-frame-pointer -g -isysroot $(SDKROOT) -static 
-    LD_OTHER_FLAGS = -Wl,-alias_list,$(SRCROOT)/lib/arm/softfloat-alias.list -isysroot $(SDKROOT)
 endif
 
 
+
 # Log full compile lines in B&I logs and omit summary lines.
 Verb :=
 Summary := @true
@@ -49,7 +44,7 @@
 	cp $(SYMROOT)/libcompiler_rt-dyld.a  \
 				    $(DSTROOT)/usr/local/lib/dyld/libcompiler_rt.a
 	mkdir -p $(DSTROOT)/usr/lib/system
-	strip -S $(SYMROOT)/libcompiler_rt.dylib \
+	$(call GetCNAVar,STRIP,Platform.darwin_bni,Release,) -S $(SYMROOT)/libcompiler_rt.dylib \
 	    -o $(DSTROOT)/usr/lib/system/libcompiler_rt.dylib
 	cd $(DSTROOT)/usr/lib/system; \
 	    ln -s libcompiler_rt.dylib libcompiler_rt_profile.dylib; \
@@ -58,16 +53,17 @@
 # Rule to make each dylib slice
 $(OBJROOT)/libcompiler_rt-%.dylib : $(OBJROOT)/darwin_bni/Release/%/libcompiler_rt.a
 	echo "const char vers[] = \"@(#) $(RC_ProjectName)-$(RC_ProjectSourceVersion)\"; " > $(OBJROOT)/version.c
-	$(CC.Release) $(OBJROOT)/version.c -arch $* -dynamiclib \
+	$(call GetCNAVar,CC,Platform.darwin_bni,Release,$*) \
+	   $(OBJROOT)/version.c -arch $* -dynamiclib \
 	   -install_name /usr/lib/system/libcompiler_rt.dylib \
 	   -compatibility_version 1 -current_version $(RC_ProjectSourceVersion) \
 	   -nodefaultlibs -lSystem -umbrella System -dead_strip \
-	   $(LD_OTHER_FLAGS) -Wl,-force_load,$^ -o $@ 
+	   $(DYLIB_FLAGS) -Wl,-force_load,$^ -o $@ 
 
 # Rule to make fat dylib
-$(SYMROOT)/libcompiler_rt.dylib: $(foreach arch,$(RC_ARCHS), \
+$(SYMROOT)/libcompiler_rt.dylib: $(foreach arch,$(filter-out armv4t,$(RC_ARCHS)), \
                                         $(OBJROOT)/libcompiler_rt-$(arch).dylib)
-	lipo -create $^ -o  $@
+	$(call GetCNAVar,LIPO,Platform.darwin_bni,Release,) -create $^ -o  $@
 
 
 
@@ -83,25 +79,27 @@
 	cp $(SYMROOT)/libcompiler_rt-dyld.a  \
 				    $(DSTROOT)/usr/local/lib/dyld/libcompiler_rt.a
 	mkdir -p $(DSTROOT)/usr/lib/system
-	strip -S $(SYMROOT)/libcompiler_rt.dylib \
+	$(call GetCNAVar,STRIP,Platform.darwin_bni,Release,) -S $(SYMROOT)/libcompiler_rt.dylib \
 	    -o $(DSTROOT)/usr/lib/system/libcompiler_rt.dylib
 
 	
 # Rule to make fat archive
 $(SYMROOT)/libcompiler_rt-static.a : $(foreach arch,$(RC_ARCHS), \
                          $(OBJROOT)/darwin_bni/Static/$(arch)/libcompiler_rt.a)
-	lipo -create $^ -o  $@
+	$(call GetCNAVar,LIPO,Platform.darwin_bni,Release,) -create $^ -o  $@
 
-# rule to make each archive slice for dyld
+# rule to make each archive slice for dyld (which removes a few archive members)
 $(OBJROOT)/libcompiler_rt-dyld-%.a : $(OBJROOT)/darwin_bni/Release/%/libcompiler_rt.a
 	cp $^ $@
-	ar -d $@ apple_versioning.o
-	ar -d $@ gcc_personality_v0.o
-	ar -d $@ eprintf.o
-	ranlib $@
+	DEL_LIST=`$(AR)  -t $@ | egrep 'apple_versioning|gcc_personality_v0|eprintf' | xargs echo` ; \
+	if [ -n "$${DEL_LIST}" ] ; \
+	then  \
+		$(call GetCNAVar,AR,Platform.darwin_bni,Release,) -d $@ $${DEL_LIST}; \
+		$(call GetCNAVar,RANLIB,Platform.darwin_bni,Release,) $@ ; \
+	fi
 
 # rule to make make archive for dyld
 $(SYMROOT)/libcompiler_rt-dyld.a : $(foreach arch,$(RC_ARCHS), \
                          $(OBJROOT)/libcompiler_rt-dyld-$(arch).a)
-	lipo -create $^ -o  $@
+	$(call GetCNAVar,LIPO,Platform.darwin_bni,Release,) -create $^ -o  $@
 
diff --git a/make/config.mk b/make/config.mk
index d96b1b4..a826173 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -8,6 +8,11 @@
 ProjSrcRoot := $(shell pwd)
 ProjObjRoot := $(ProjSrcRoot)
 
+# The list of modules which are required to be built into every library. This
+# should only be used for internal utilities which could be used in any other
+# module. Any other cases the platform should be allowed to opt-in to.
+AlwaysRequiredModules := int_util
+
 ###
 # Tool configuration variables.
 
@@ -34,3 +39,8 @@
 ifndef Summary
   Summary = $(Echo)
 endif
+
+###
+# Common compiler options
+COMMON_CXXFLAGS=-fno-exceptions -fPIC -funwind-tables
+COMMON_CFLAGS=-fPIC
diff --git a/make/lib_info.mk b/make/lib_info.mk
index 72bc4a1..2e85f64 100644
--- a/make/lib_info.mk
+++ b/make/lib_info.mk
@@ -14,6 +14,14 @@
 #   AvailableIn.<function> - The list of subdir keys where 'function' is
 #                            defined.
 
+# Determine the set of available modules.
+AvailableModules := $(sort $(foreach key,$(SubDirKeys),\
+	$($(key).ModuleName)))
+
+# Build a per-module map of subdir keys.
+$(foreach key,$(SubDirKeys),\
+	$(call Append,ModuleSubDirKeys.$($(key).ModuleName),$(key)))
+
 AvailableArchs := $(sort $(foreach key,$(SubDirKeys),\
 	$($(key).OnlyArchs)))
 
@@ -21,12 +29,12 @@
 	$(basename $($(key).ObjNames))))
 
 CommonFunctions := $(sort\
-  $(foreach key,$(SubDirKeys),\
+  $(foreach key,$(ModuleSubDirKeys.builtins),\
     $(if $(call strneq,,$(strip $($(key).OnlyArchs) $($(key).OnlyConfigs))),,\
          $(basename $($(key).ObjNames)))))
 
 # Compute common arch functions.
-$(foreach key,$(SubDirKeys),\
+$(foreach key,$(ModuleSubDirKeys.builtins),\
   $(if $(call strneq,,$($(key).OnlyConfigs)),,\
     $(foreach arch,$($(key).OnlyArchs),\
       $(call Append,ArchFunctions.$(arch),$(sort \
@@ -48,4 +56,5 @@
                     CC CFLAGS FUNCTIONS OPTIMIZED \
                     RANLIB RANLIBFLAGS \
                     VISIBILITY_HIDDEN \
-                    KERNEL_USE
+                    KERNEL_USE \
+                    STRIP LIPO
diff --git a/make/options.mk b/make/options.mk
index f6a331b..f695fc8 100644
--- a/make/options.mk
+++ b/make/options.mk
@@ -31,3 +31,6 @@
 RANLIB := ranlib
 # FIXME: Remove these pipes once ranlib errors are fixed.
 RANLIBFLAGS := 2> /dev/null
+
+STRIP := strip
+LIPO := lipo
diff --git a/make/platform/clang_darwin.mk b/make/platform/clang_darwin.mk
index 6eb10c8..2ca5fbc 100644
--- a/make/platform/clang_darwin.mk
+++ b/make/platform/clang_darwin.mk
@@ -12,14 +12,23 @@
   $(shell \
     result=""; \
     for arch in $(1); do \
-      if $(CC) -arch $$arch -dumpversion > /dev/null; then \
+      if $(CC) -arch $$arch -c \
+	  -integrated-as \
+	  $(ProjSrcRoot)/make/platform/clang_darwin_test_input.c \
+	  -isysroot $(ProjSrcRoot)/SDKs/darwin \
+	  -o /dev/null > /dev/null 2> /dev/null; then \
         result="$$result$$arch "; \
+      else \
+	printf 1>&2 \
+	  "warning: clang_darwin.mk: dropping arch '$$arch' from lib '$(2)'\n"; \
       fi; \
     done; \
     echo $$result)
 
 ###
 
+CC := clang
+
 Configs :=
 UniversalArchs :=
 
@@ -27,28 +36,54 @@
 # still be referenced from Darwin system headers. This symbol is only ever
 # needed on i386.
 Configs += eprintf
-UniversalArchs.eprintf := $(call CheckArches,i386)
+UniversalArchs.eprintf := $(call CheckArches,i386,eprintf)
 
 # Configuration for targetting 10.4. We need a few functions missing from
 # libgcc_s.10.4.dylib. We only build x86 slices since clang doesn't really
 # support targetting PowerPC.
 Configs += 10.4
-UniversalArchs.10.4 := $(call CheckArches,i386 x86_64)
+UniversalArchs.10.4 := $(call CheckArches,i386 x86_64,10.4)
 
 # Configuration for targetting iOS, for some ARMv6 functions, which must be
 # in the same linkage unit, and for a couple of other functions that didn't
 # make it into libSystem.
 Configs += ios
-UniversalArchs.ios := $(call CheckArches,i386 x86_64 armv6 armv7)
+UniversalArchs.ios := $(call CheckArches,i386 x86_64 armv6 armv7,ios)
+
+# Configuration for targetting OSX. These functions may not be in libSystem
+# so we should provide our own.
+Configs += osx
+UniversalArchs.osx := $(call CheckArches,i386 x86_64,osx)
 
 # Configuration for use with kernel/kexts.
 Configs += cc_kext
-UniversalArchs.cc_kext := $(call CheckArches,armv6 armv7 i386 x86_64)
+UniversalArchs.cc_kext := $(call CheckArches,armv6 armv7 i386 x86_64,cc_kext)
+
+# Configurations which define the profiling support functions.
+Configs += profile_osx
+UniversalArchs.profile_osx := $(call CheckArches,i386 x86_64,profile_osx)
+Configs += profile_ios
+UniversalArchs.profile_ios := $(call CheckArches,i386 x86_64 armv6 armv7,profile_ios)
+
+# Configurations which define the ASAN support functions.
+Configs += asan_osx
+UniversalArchs.asan_osx := $(call CheckArches,i386 x86_64,asan_osx)
+
+# If RC_SUPPORTED_ARCHS is defined, treat it as a list of the architectures we
+# are intended to support and limit what we try to build to that.
+#
+# We make sure to remove empty configs if we end up dropping all the requested
+# archs for a particular config.
+ifneq ($(RC_SUPPORTED_ARCHS),)
+$(foreach config,$(Configs),\
+  $(call Set,UniversalArchs.$(config),\
+	$(filter $(RC_SUPPORTED_ARCHS),$(UniversalArchs.$(config))))\
+  $(if $(UniversalArchs.$(config)),,\
+	$(call Set,Configs,$(filter-out $(config),$(Configs)))))
+endif
 
 ###
 
-CC := gcc
-
 # Forcibly strip off any -arch, as that totally breaks our universal support.
 override CC := $(subst -arch ,-arch_,$(CC))
 override CC := $(patsubst -arch_%,,$(CC))
@@ -59,29 +94,41 @@
 # never depend on the environmental overrides. We simply set them to minimum
 # supported deployment target -- nothing in the compiler-rt libraries should
 # actually depend on the deployment target.
-X86_DEPLOYMENT_ARGS := -mmacosx-version-min=10.4
-ARM_DEPLOYMENT_ARGS := -miphoneos-version-min=1.0
+OSX_DEPLOYMENT_ARGS := -mmacosx-version-min=10.4
+IOS_DEPLOYMENT_ARGS := -miphoneos-version-min=1.0
+IOSSIM_DEPLOYMENT_ARGS := -miphoneos-version-min=1.0
 
-# If an explicit ARM_SDK build variable is set, use that as the isysroot.
-ifneq ($(ARM_SDK),)
-ARM_DEPLOYMENT_ARGS += -isysroot $(ARM_SDK)
-endif
+# Use our stub SDK as the sysroot to support more portable building.
+OSX_DEPLOYMENT_ARGS += -isysroot $(ProjSrcRoot)/SDKs/darwin
+IOS_DEPLOYMENT_ARGS += -isysroot $(ProjSrcRoot)/SDKs/darwin
+IOSSIM_DEPLOYMENT_ARGS += -isysroot $(ProjSrcRoot)/SDKs/darwin
 
-CFLAGS.eprintf		:= $(CFLAGS) $(X86_DEPLOYMENT_ARGS)
-CFLAGS.10.4		:= $(CFLAGS) $(X86_DEPLOYMENT_ARGS)
-CFLAGS.ios.i386		:= $(CFLAGS) $(X86_DEPLOYMENT_ARGS)
-CFLAGS.ios.x86_64	:= $(CFLAGS) $(X86_DEPLOYMENT_ARGS)
-CFLAGS.ios.armv6	:= $(CFLAGS) $(ARM_DEPLOYMENT_ARGS)
-CFLAGS.ios.armv7	:= $(CFLAGS) $(ARM_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext.i386	:= $(CFLAGS) $(X86_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext.x86_64	:= $(CFLAGS) $(X86_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext.armv6	:= $(CFLAGS) $(ARM_DEPLOYMENT_ARGS) -mthumb
-CFLAGS.cc_kext.armv7	:= $(CFLAGS) $(ARM_DEPLOYMENT_ARGS) -mthumb
+CFLAGS.eprintf		:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
+CFLAGS.10.4		:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
+# FIXME: We can't build ASAN with our stub SDK yet.
+CFLAGS.asan_osx         := $(CFLAGS) -mmacosx-version-min=10.5
+
+CFLAGS.ios.i386		:= $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
+CFLAGS.ios.x86_64	:= $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
+CFLAGS.ios.armv6	:= $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
+CFLAGS.ios.armv7	:= $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
+CFLAGS.osx.i386		:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
+CFLAGS.osx.x86_64	:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
+CFLAGS.cc_kext.i386	:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
+CFLAGS.cc_kext.x86_64	:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
+CFLAGS.cc_kext.armv6	:= $(CFLAGS) $(IOS_DEPLOYMENT_ARGS) -mthumb
+CFLAGS.cc_kext.armv7	:= $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
+CFLAGS.profile_osx.i386   := $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
+CFLAGS.profile_osx.x86_64 := $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
+CFLAGS.profile_ios.i386   := $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
+CFLAGS.profile_ios.x86_64 := $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
+CFLAGS.profile_ios.armv6  := $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
+CFLAGS.profile_ios.armv7  := $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
 
 FUNCTIONS.eprintf := eprintf
 FUNCTIONS.10.4 := eprintf floatundidf floatundisf floatundixf
 
-FUNCTIONS.ios	    := divmodsi4 udivmodsi4
+FUNCTIONS.ios	    := divmodsi4 udivmodsi4 mulosi4 mulodi4 muloti4
 # On x86, the divmod functions reference divsi.
 FUNCTIONS.ios.i386   := $(FUNCTIONS.ios) \
                         divsi3 udivsi3
@@ -92,6 +139,13 @@
                        switch16 switch32 switch8 switchu8 \
                        save_vfp_d8_d15_regs restore_vfp_d8_d15_regs
 
+FUNCTIONS.osx	:= mulosi4 mulodi4 muloti4
+
+FUNCTIONS.profile_osx := GCDAProfiling
+FUNCTIONS.profile_ios := GCDAProfiling
+
+FUNCTIONS.asan_osx := $(AsanFunctions)
+
 CCKEXT_COMMON_FUNCTIONS := \
 	absvdi2 \
 	absvsi2 \
@@ -203,8 +257,42 @@
 	unorddf2 \
 	unordsf2
 
-FUNCTIONS.cc_kext.armv6 := $(CCKEXT_ARM_FUNCTIONS)
-FUNCTIONS.cc_kext.armv7 := $(CCKEXT_ARM_FUNCTIONS)
+CCKEXT_ARMVFP_FUNCTIONS := $(CCKEXT_ARM_FUNCTIONS) \
+	adddf3vfp \
+	addsf3vfp \
+	divdf3vfp \
+	divsf3vfp \
+	eqdf2vfp \
+	eqsf2vfp \
+	extendsfdf2vfp \
+	fixdfsivfp \
+	fixsfsivfp \
+	fixunsdfsivfp \
+	fixunssfsivfp \
+	floatsidfvfp \
+	floatsisfvfp \
+	floatunssidfvfp \
+	floatunssisfvfp \
+	gedf2vfp \
+	gesf2vfp \
+	gtdf2vfp \
+	gtsf2vfp \
+	ledf2vfp \
+	lesf2vfp \
+	ltdf2vfp \
+	ltsf2vfp \
+	muldf3vfp \
+	mulsf3vfp \
+	nedf2vfp \
+	nesf2vfp \
+	subdf3vfp \
+	subsf3vfp \
+	truncdfsf2vfp \
+	unorddf2vfp \
+	unordsf2vfp
+
+FUNCTIONS.cc_kext.armv6 := $(CCKEXT_ARMVFP_FUNCTIONS)
+FUNCTIONS.cc_kext.armv7 := $(CCKEXT_ARMVFP_FUNCTIONS)
 
 CCKEXT_X86_FUNCTIONS := $(CCKEXT_COMMON_FUNCTIONS) \
 	divxc3 \
diff --git a/make/platform/clang_darwin_test_input.c b/make/platform/clang_darwin_test_input.c
new file mode 100644
index 0000000..b7074b8
--- /dev/null
+++ b/make/platform/clang_darwin_test_input.c
@@ -0,0 +1,6 @@
+/* Include the headers we use in int_lib.h, to verify that they work. */
+
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
diff --git a/make/platform/clang_linux.mk b/make/platform/clang_linux.mk
new file mode 100644
index 0000000..b3a7759
--- /dev/null
+++ b/make/platform/clang_linux.mk
@@ -0,0 +1,82 @@
+Description := Static runtime libraries for clang/Linux.
+
+###
+
+CC := clang
+Arch := unknown
+Configs :=
+
+# We don't currently have any general purpose way to target architectures other
+# than the compiler defaults (because there is no generalized way to invoke
+# cross compilers). For now, we just find the target archicture of the compiler
+# and only define configurations we know that compiler can generate.
+CompilerTargetTriple := $(shell \
+	$(CC) -v 2>&1 | grep 'Target:' | cut -d' ' -f2)
+ifneq ($(DEBUGMAKE),)
+ifeq ($(CompilerTargetTriple),)
+$(error "unable to infer compiler target triple for $(CC)")
+endif
+endif
+
+CompilerTargetArch := $(firstword $(subst -, ,$(CompilerTargetTriple)))
+
+# Only define configs if we detected a linux target.
+ifneq ($(findstring -linux-,$(CompilerTargetTriple)),)
+
+# Configurations which just include all the runtime functions.
+ifeq ($(call contains,i386 x86_64,$(CompilerTargetArch)),true)
+Configs += full-i386 full-x86_64
+Arch.full-i386 := i386
+Arch.full-x86_64 := x86_64
+endif
+
+# Configuration for profile runtime.
+ifeq ($(call contains,i386 x86_64,$(CompilerTargetArch)),true)
+Configs += profile-i386 profile-x86_64
+Arch.profile-i386 := i386
+Arch.profile-x86_64 := x86_64
+endif
+
+# Configuration for ASAN runtime.
+ifeq ($(CompilerTargetArch),i386)
+Configs += asan-i386
+Arch.asan-i386 := i386
+endif
+ifeq ($(CompilerTargetArch),x86_64)
+Configs += asan-x86_64
+Arch.asan-x86_64 := x86_64
+endif
+
+endif
+
+###
+
+CFLAGS := -Wall -Werror -O3 -fomit-frame-pointer
+
+CFLAGS.full-i386 := $(CFLAGS) -m32
+CFLAGS.full-x86_64 := $(CFLAGS) -m64
+CFLAGS.profile-i386 := $(CFLAGS) -m32
+CFLAGS.profile-x86_64 := $(CFLAGS) -m64
+CFLAGS.asan-i386 := $(CFLAGS) -m32
+CFLAGS.asan-x86_64 := $(CFLAGS) -m64
+
+# Use our stub SDK as the sysroot to support more portable building. For now we
+# just do this for the non-ASAN modules, because the stub SDK doesn't have
+# enough support to build ASAN.
+CFLAGS.full-i386 += --sysroot=$(ProjSrcRoot)/SDKs/linux
+CFLAGS.full-x86_64 += --sysroot=$(ProjSrcRoot)/SDKs/linux
+CFLAGS.profile-i386 += --sysroot=$(ProjSrcRoot)/SDKs/linux
+CFLAGS.profile-x86_64 += --sysroot=$(ProjSrcRoot)/SDKs/linux
+
+FUNCTIONS.full-i386 := $(CommonFunctions) $(ArchFunctions.i386)
+FUNCTIONS.full-x86_64 := $(CommonFunctions) $(ArchFunctions.x86_64)
+FUNCTIONS.profile-i386 := GCDAProfiling
+FUNCTIONS.profile-x86_64 := GCDAProfiling
+FUNCTIONS.asan-i386 := $(AsanFunctions)
+FUNCTIONS.asan-x86_64 := $(AsanFunctions)
+
+# Always use optimized variants.
+OPTIMIZED := 1
+
+# We don't need to use visibility hidden on Linux.
+VISIBILITY_HIDDEN := 0
diff --git a/make/platform/darwin_bni.mk b/make/platform/darwin_bni.mk
index 14a1c19..477e072 100644
--- a/make/platform/darwin_bni.mk
+++ b/make/platform/darwin_bni.mk
@@ -1,24 +1,37 @@
 
 Description := Target for Darwin using an Apple-style build.
 
-Configs := Debug Release Profile Static 
+Configs := Debug Release Profile Static
 
 # We override this with RC_ARCHS because B&I may want to build on an ARCH we
 # haven't explicitly defined support for. If all goes well, this will just work
 # and the resulting lib will just have generic versions for anything unknown.
 UniversalArchs := $(RC_ARCHS)
 
-ifeq (,$(SDKROOT))
-	CC.Release := $(CC)
-	CC.Static  := $(CC)
-else
-	CC.Release := /Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/cc
-	CC.Static  := /Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/cc
+ifneq (,$(SDKROOT))
+	override CC := $(shell xcrun -sdk $(SDKROOT) -find clang) 
+	AR := $(shell xcrun -sdk $(SDKROOT) -find ar) 
+	RANLIB := $(shell xcrun -sdk $(SDKROOT) -find ranlib) 
+	STRIP := $(shell xcrun -sdk $(SDKROOT) -find strip) 
+	LIPO := $(shell xcrun -sdk $(SDKROOT) -find lipo)
 endif
 
+ifneq ($(IPHONEOS_DEPLOYMENT_TARGET),)
+	DEPLOYMENT_FLAGS := -miphoneos-version-min=$(IPHONEOS_DEPLOYMENT_TARGET) 
+else
+	ifneq ($(MACOSX_DEPLOYMENT_TARGET),)
+		DEPLOYMENT_FLAGS := -mmacosx-version-min=$(MACOSX_DEPLOYMENT_TARGET) 
+	endif
+endif
 
-CFLAGS := -Wall -Os -fomit-frame-pointer -g
-CFLAGS.Static := $(CFLAGS) -static 
+ifneq (,$(SDKROOT))
+	DEPLOYMENT_FLAGS += -isysroot $(SDKROOT)
+endif
+
+CFLAGS := -Wall -Os -fomit-frame-pointer -g $(DEPLOYMENT_FLAGS)
+CFLAGS.Static := $(CFLAGS) -static  
+DYLIB_FLAGS := $(DEPLOYMENT_FLAGS) \
+		-Xarch_arm -Wl,-alias_list,$(SRCROOT)/lib/arm/softfloat-alias.list
 
 VISIBILITY_HIDDEN := 0
 VISIBILITY_HIDDEN.Static  := 1
@@ -29,8 +42,8 @@
              divdc3 divdi3 divsc3 ffsdi2 \
              fixdfdi fixsfdi fixunsdfdi fixunsdfsi fixunssfdi \
              fixunssfsi floatdidf floatdisf floatundidf floatundisf \
-             gcc_personality_v0 lshrdi3 moddi3 muldc3 muldi3 \
-             mulsc3 mulvdi3 mulvsi3 negdi2 negvdi2 negvsi2 \
+             gcc_personality_v0 lshrdi3 moddi3 muldc3 muldi3 mulosi4 \
+             mulodi4 muloti4 mulsc3 mulvdi3 mulvsi3 negdi2 negvdi2 negvsi2 \
              paritydi2 paritysi2 popcountdi2 popcountsi2 powidf2 \
              powisf2 subvdi3 subvsi3 ucmpdi2 udivdi3 \
              udivmoddi4 umoddi3 apple_versioning eprintf
@@ -51,9 +64,12 @@
                 fixunsxfti fixxfdi fixxfti floatdixf floattidf \
                 floattisf floattixf floatundixf floatuntidf \
                 floatuntisf floatuntixf lshrti3 modti3 multi3 \
-                mulvti3 mulxc3 negti2 negvti2 parityti2 \
+                muloti4 mulvti3 mulxc3 negti2 negvti2 parityti2 \
                 popcountti2 powixf2 subvti3 ucmpti2 udivmodti4 \
                 udivti3 umodti3 clear_cache enable_execute_stack
+
+FUNCTIONS.armv4t := $(FUNCTIONS) 
+
 FUNCTIONS.armv5 := $(FUNCTIONS) \
                 adddf3 addsf3 bswapdi2 bswapsi2  \
                 comparedf2 comparesf2 extendsfdf2 \
@@ -65,7 +81,7 @@
                 truncdfsf2  \
                 modsi3 umodsi3 udivsi3 divsi3 udivmodsi4 divmodsi4 \
                 switch8 switchu8 switch16 switch32 \
-                sync_synchronize 
+                sync_synchronize
 
 FUNCTIONS.armv6 := $(FUNCTIONS) \
 				comparedf2 comparesf2 \
@@ -81,7 +97,7 @@
                 modsi3 umodsi3 udivsi3 divsi3 udivmodsi4 divmodsi4 \
                 switch8 switchu8 switch16 switch32 \
                 restore_vfp_d8_d15_regs save_vfp_d8_d15_regs \
-                sync_synchronize 
+                sync_synchronize
 
 FUNCTIONS.armv7 := $(FUNCTIONS) \
 				comparedf2 comparesf2 \
@@ -95,4 +111,3 @@
                 nedf2vfp nesf2vfp \
                 subdf3vfp subsf3vfp truncdfsf2vfp unorddf2vfp unordsf2vfp \
                 modsi3 umodsi3 udivsi3 divsi3 udivmodsi4 divmodsi4
-
diff --git a/make/platform/darwin_fat.mk b/make/platform/darwin_fat.mk
index 30e57a1..54936a3 100644
--- a/make/platform/darwin_fat.mk
+++ b/make/platform/darwin_fat.mk
@@ -24,7 +24,7 @@
 Description := Target for building universal libraries for Darwin.
 
 Configs := Debug Release Profile
-UniversalArchs := i386 ppc x86_64
+UniversalArchs := i386 x86_64
 
 # Platform Options
 #
@@ -37,7 +37,7 @@
 #   <Option Name>.<Arch Name> := ...
 #   <Option Name>.<Config Name>.<Arch Name> := ...
 
-CC := gcc
+CC := clang
 
 CFLAGS := -Wall -Werror
 CFLAGS.Debug := $(CFLAGS) -g
diff --git a/make/platform/multi_arch.mk b/make/platform/multi_arch.mk
index eebc7b2..fe6ac4b 100644
--- a/make/platform/multi_arch.mk
+++ b/make/platform/multi_arch.mk
@@ -5,8 +5,7 @@
 Arch := i386
 Arch.m64 := x86_64
 
-CC := gcc
-CC.m32 := clang
+CC := clang
 
 CFLAGS := -Wall -Werror
 CFLAGS.m32 := $(CFLAGS) -m32 -O3
diff --git a/make/subdir.mk b/make/subdir.mk
index 900f7e6..5b3c1f0 100644
--- a/make/subdir.mk
+++ b/make/subdir.mk
@@ -20,7 +20,8 @@
 
 # The list of variables which are intended to be overridden in a subdirectory
 # makefile.
-RequiredSubdirVariables := SubDirs ObjNames Implementation Dependencies
+RequiredSubdirVariables := \
+	ModuleName SubDirs ObjNames Implementation Dependencies
 OptionalSubdirVariables := OnlyArchs OnlyConfigs
 
 # Template: subdir_traverse_template subdir
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
deleted file mode 100644
index eac7608..0000000
--- a/test/CMakeLists.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-PROJECT( tests C )
-
-SET( CompilerRT_LIBRARY CompilerRT )
-INCLUDE_DIRECTORIES(${CompilerRT_SOURCE_DIR}/lib)
-
-# create test library
-# add_library(${CompilerRT_LIBRARY} STATIC support.c cmdline.c)
-TARGET_LINK_LIBRARIES( ${CompilerRT_LIBRARY} )
-
-# FIXME: We should check Optmized versions of CompilerRT-* here also.
-SET( TEST_TARGET_LIBRARIES ${CompilerRT_LIBRARY}-Generic )
-
-INCLUDE( MacroAddCheckTest )
-# create tests
-# MACRO_ADD_CHECK_TEST( foo foo.c ${TEST_TARGET_LIBRARIES} )
-
-
-# Create Unit/ tests
-
-MACRO_ADD_CHECK_TEST( ashldi3_test Unit/ashldi3_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( gcc_personality_test Unit/gcc_personality_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( udivmodti4_test Unit/udivmodti4_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( negvsi2_test Unit/negvsi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixdfdi_test Unit/fixdfdi_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( mulvsi3_test Unit/mulvsi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixdfti_test Unit/fixdfti_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( muldc3_test Unit/muldc3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( popcountdi2_test Unit/popcountdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( negti2_test Unit/negti2_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( divsc3_test Unit/divsc3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( cmpti2_test Unit/cmpti2_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( trampoline_setup_test Unit/trampoline_setup_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( mulvti3_test Unit/mulvti3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunsxfdi_test Unit/fixunsxfdi_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunsxfti_test Unit/fixunsxfti_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( paritydi2_test Unit/paritydi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( negvti2_test Unit/negvti2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( divtc3_test Unit/divtc3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ucmpti2_test Unit/ucmpti2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( multc3_test Unit/multc3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatdixf_test Unit/floatdixf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( popcountti2_test Unit/popcountti2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( negdi2_test Unit/negdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatdidf_test Unit/floatdidf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunstfdi_test Unit/fixunstfdi_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( shlti3_test Unit/ashlti3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( enable_execute_stack_test Unit/enable_execute_stack_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatundixf_test Unit/floatundixf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( udivmoddi4_test Unit/udivmoddi4_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( paritysi2_test Unit/paritysi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatundidf_test Unit/floatundidf_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( divdc3_test Unit/divdc3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatuntisf_test Unit/floatuntisf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ucmpdi2_test Unit/ucmpdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( powixf2_test Unit/powixf2_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( mulsc3_test Unit/mulsc3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( popcountsi2_test Unit/popcountsi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( cmpdi2_test Unit/cmpdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floattisf_test Unit/floattisf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( mulvdi3_test Unit/mulvdi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunssfdi_test Unit/fixunssfdi_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunsdfsi_test Unit/fixunsdfsi_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunssfti_test Unit/fixunssfti_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( parityti2_test Unit/parityti2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( negvdi2_test Unit/negvdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( moddi3_test Unit/moddi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( clear_cache_test Unit/clear_cache_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunsxfsi_test Unit/fixunsxfsi_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( subvdi3_test Unit/subvdi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( multi3_test Unit/multi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( addvdi3_test Unit/addvdi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( clzti2_test Unit/clzti2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( umoddi3_test Unit/umoddi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( absvsi2_test Unit/absvsi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ctzdi2_test Unit/ctzdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( powitf2_test Unit/powitf2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixsfdi_test Unit/fixsfdi_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ffsti2_test Unit/ffsti2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( divdi3_test Unit/divdi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixsfti_test Unit/fixsfti_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( absvti2_test Unit/absvti2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ashrti3_test Unit/ashrti3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( powisf2_test Unit/powisf2_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( mulxc3_test Unit/mulxc3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( lshrdi3_test Unit/lshrdi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( udivdi3_test Unit/udivdi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( clzsi2_test Unit/clzsi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( addvti3_test Unit/addvti3_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( udivsi3_test Unit/udivsi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( clzdi2_test Unit/clzdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatuntidf_test Unit/floatuntidf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( umodti3_test Unit/umodti3_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( divxc3_test Unit/divxc3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ctzti2_test Unit/ctzti2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatuntixf_test Unit/floatuntixf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( powidf2_test Unit/powidf2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floattidf_test Unit/floattidf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( modti3_test Unit/modti3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunssfsi_test Unit/fixunssfsi_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunsdfdi_test Unit/fixunsdfdi_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( divsi3_test Unit/divsi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floattixf_test Unit/floattixf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixunsdfti_test Unit/fixunsdfti_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( subvti3_test Unit/subvti3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( muldi3_test Unit/muldi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ctzsi2_test Unit/ctzsi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( absvdi2_test Unit/absvdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ashrdi3_test Unit/ashrdi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( lshrti3_test Unit/lshrti3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatdisf_test Unit/floatdisf_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( addvsi3_test Unit/addvsi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( udivti3_test Unit/udivti3_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( umodsi3_test Unit/umodsi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( subvsi3_test Unit/subvsi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixxfdi_test Unit/fixxfdi_test.c ${TEST_TARGET_LIBRARIES} )
-# MACRO_ADD_CHECK_TEST( modsi3_test Unit/modsi3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( fixxfti_test Unit/fixxfti_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ffsdi2_test Unit/ffsdi2_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( divti3_test Unit/divti3_test.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatundisf_test Unit/floatundisf_test.c ${TEST_TARGET_LIBRARIES} )
-
-#ADD_SUBDIRECTORY( timing )
diff --git a/test/Unit/clear_cache_test.c b/test/Unit/clear_cache_test.c
index 5267239..3507fd8 100644
--- a/test/Unit/clear_cache_test.c
+++ b/test/Unit/clear_cache_test.c
@@ -11,11 +11,20 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
+#if defined(_WIN32)
+#include <windows.h>
+void __clear_cache(void* start, void* end)
+{
+    if (!FlushInstructionCache(GetCurrentProcess(), start, end-start))
+        exit(1);
+}
+#else
 #include <sys/mman.h>
-
-
-
 extern void __clear_cache(void* start, void* end);
+#endif
+
+
+
 
 typedef int (*pfunc)(void);
 
@@ -38,21 +47,29 @@
     // make executable the page containing execution_buffer 
     char* start = (char*)((uintptr_t)execution_buffer & (-4095));
     char* end = (char*)((uintptr_t)(&execution_buffer[128+4096]) & (-4095));
-    if ( mprotect(start, end-start, PROT_READ|PROT_WRITE|PROT_EXEC) != 0 )
+#if defined(_WIN32)
+    DWORD dummy_oldProt;
+    MEMORY_BASIC_INFORMATION b;
+    if (!VirtualQuery(start, &b, sizeof(b)))
+        return 1;
+    if (!VirtualProtect(b.BaseAddress, b.RegionSize, PAGE_EXECUTE_READWRITE, &b.Protect))
+#else
+    if (mprotect(start, end-start, PROT_READ|PROT_WRITE|PROT_EXEC) != 0)
+#endif
         return 1;
 
     // verify you can copy and execute a function
     memcpy(execution_buffer, (void *)(uintptr_t)&func1, 128);
     __clear_cache(execution_buffer, &execution_buffer[128]);
     pfunc f1 = (pfunc)(uintptr_t)execution_buffer;
-    if ( (*f1)() != 1 )
+    if ((*f1)() != 1)
         return 1;
 
     // verify you can overwrite a function with another
     memcpy(execution_buffer, (void *)(uintptr_t)&func2, 128);
     __clear_cache(execution_buffer, &execution_buffer[128]);
     pfunc f2 = (pfunc)(uintptr_t)execution_buffer;
-    if ( (*f2)() != 2 )
+    if ((*f2)() != 2)
         return 1;
 
     return 0;
diff --git a/test/Unit/enable_execute_stack_test.c b/test/Unit/enable_execute_stack_test.c
index ae4c320..c0f67b3 100644
--- a/test/Unit/enable_execute_stack_test.c
+++ b/test/Unit/enable_execute_stack_test.c
@@ -11,12 +11,27 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
+#if defined(_WIN32)
+#include <windows.h>
+void __clear_cache(void* start, void* end)
+{
+    if (!FlushInstructionCache(GetCurrentProcess(), start, end-start))
+        exit(1);
+}
+void __enable_execute_stack(void *addr)
+{
+    MEMORY_BASIC_INFORMATION b;
+
+    if (!VirtualQuery(addr, &b, sizeof(b)))
+        exit(1);
+    if (!VirtualProtect(b.BaseAddress, b.RegionSize, PAGE_EXECUTE_READWRITE, &b.Protect))
+        exit(1);
+}
+#else
 #include <sys/mman.h>
-
-
-
 extern void __clear_cache(void* start, void* end);
 extern void __enable_execute_stack(void* addr);
+#endif
 
 typedef int (*pfunc)(void);
 
@@ -43,14 +58,14 @@
     memcpy(execution_buffer, (void *)(uintptr_t)&func1, 128);
     __clear_cache(execution_buffer, &execution_buffer[128]);
     pfunc f1 = (pfunc)(uintptr_t)execution_buffer;
-    if ( (*f1)() != 1 )
+    if ((*f1)() != 1)
         return 1;
 
     // verify you can overwrite a function with another
     memcpy(execution_buffer, (void *)(uintptr_t)&func2, 128);
     __clear_cache(execution_buffer, &execution_buffer[128]);
     pfunc f2 = (pfunc)(uintptr_t)execution_buffer;
-    if ( (*f2)() != 2 )
+    if ((*f2)() != 2)
         return 1;
 
     return 0;
diff --git a/test/Unit/endianness.h b/test/Unit/endianness.h
index 6f9f77b..669e6f1 100644
--- a/test/Unit/endianness.h
+++ b/test/Unit/endianness.h
@@ -36,7 +36,7 @@
 
 /* .. */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonflyBSD__)
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonflyBSD__) || defined(__minix)
 #include <sys/endian.h>
 
 #if _BYTE_ORDER == _BIG_ENDIAN
diff --git a/test/Unit/mulodi4_test.c b/test/Unit/mulodi4_test.c
new file mode 100644
index 0000000..10a0eaa
--- /dev/null
+++ b/test/Unit/mulodi4_test.c
@@ -0,0 +1,178 @@
+//===-- mulodi4_test.c - Test __mulodi4 -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __mulodi4 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+#include <stdio.h>
+
+extern di_int __mulodi4(di_int a, di_int b, int* overflow);
+
+int test__mulodi4(di_int a, di_int b, di_int expected, int expected_overflow)
+{
+    int ov;
+    di_int x = __mulodi4(a, b, &ov);
+    if (ov != expected_overflow)
+      printf("error in __mulodi4: overflow=%d expected=%d\n",
+	     ov, expected_overflow);
+    else if (!expected_overflow && x != expected) {
+        printf("error in __mulodi4: 0x%llX * 0x%llX = 0x%llX (overflow=%d), "
+			   "expected 0x%llX (overflow=%d)\n",
+               a, b, x, ov, expected, expected_overflow);
+		return 1;
+    }
+    return 0;
+}
+
+int main()
+{
+    if (test__mulodi4(0, 0, 0, 0))
+        return 1;
+    if (test__mulodi4(0, 1, 0, 0))
+        return 1;
+    if (test__mulodi4(1, 0, 0, 0))
+        return 1;
+    if (test__mulodi4(0, 10, 0, 0))
+        return 1;
+    if (test__mulodi4(10, 0, 0, 0))
+        return 1;
+    if (test__mulodi4(0, 81985529216486895LL, 0, 0))
+        return 1;
+    if (test__mulodi4(81985529216486895LL, 0, 0, 0))
+        return 1;
+
+    if (test__mulodi4(0, -1, 0, 0))
+        return 1;
+    if (test__mulodi4(-1, 0, 0, 0))
+        return 1;
+    if (test__mulodi4(0, -10, 0, 0))
+        return 1;
+    if (test__mulodi4(-10, 0, 0, 0))
+        return 1;
+    if (test__mulodi4(0, -81985529216486895LL, 0, 0))
+        return 1;
+    if (test__mulodi4(-81985529216486895LL, 0, 0, 0))
+        return 1;
+
+    if (test__mulodi4(1, 1, 1, 0))
+        return 1;
+    if (test__mulodi4(1, 10, 10, 0))
+        return 1;
+    if (test__mulodi4(10, 1, 10, 0))
+        return 1;
+    if (test__mulodi4(1, 81985529216486895LL, 81985529216486895LL, 0))
+        return 1;
+    if (test__mulodi4(81985529216486895LL, 1, 81985529216486895LL, 0))
+        return 1;
+
+    if (test__mulodi4(1, -1, -1, 0))
+        return 1;
+    if (test__mulodi4(1, -10, -10, 0))
+        return 1;
+    if (test__mulodi4(-10, 1, -10, 0))
+        return 1;
+    if (test__mulodi4(1, -81985529216486895LL, -81985529216486895LL, 0))
+        return 1;
+    if (test__mulodi4(-81985529216486895LL, 1, -81985529216486895LL, 0))
+        return 1;
+
+    if (test__mulodi4(3037000499LL, 3037000499LL, 9223372030926249001LL, 0))
+        return 1;
+    if (test__mulodi4(-3037000499LL, 3037000499LL, -9223372030926249001LL, 0))
+        return 1;
+    if (test__mulodi4(3037000499LL, -3037000499LL, -9223372030926249001LL, 0))
+        return 1;
+    if (test__mulodi4(-3037000499LL, -3037000499LL, 9223372030926249001LL, 0))
+        return 1;
+
+    if (test__mulodi4(4398046511103LL, 2097152LL, 9223372036852678656LL, 0))
+        return 1;
+    if (test__mulodi4(-4398046511103LL, 2097152LL, -9223372036852678656LL, 0))
+        return 1;
+    if (test__mulodi4(4398046511103LL, -2097152LL, -9223372036852678656LL, 0))
+        return 1;
+    if (test__mulodi4(-4398046511103LL, -2097152LL, 9223372036852678656LL, 0))
+        return 1;
+
+    if (test__mulodi4(2097152LL, 4398046511103LL, 9223372036852678656LL, 0))
+        return 1;
+    if (test__mulodi4(-2097152LL, 4398046511103LL, -9223372036852678656LL, 0))
+        return 1;
+    if (test__mulodi4(2097152LL, -4398046511103LL, -9223372036852678656LL, 0))
+        return 1;
+    if (test__mulodi4(-2097152LL, -4398046511103LL, 9223372036852678656LL, 0))
+        return 1;
+
+     if (test__mulodi4(0x7FFFFFFFFFFFFFFFLL, -2, 2, 1))
+         return 1;
+     if (test__mulodi4(-2, 0x7FFFFFFFFFFFFFFFLL, 2, 1))
+         return 1;
+    if (test__mulodi4(0x7FFFFFFFFFFFFFFFLL, -1, 0x8000000000000001LL, 0))
+        return 1;
+    if (test__mulodi4(-1, 0x7FFFFFFFFFFFFFFFLL, 0x8000000000000001LL, 0))
+        return 1;
+    if (test__mulodi4(0x7FFFFFFFFFFFFFFFLL, 0, 0, 0))
+        return 1;
+    if (test__mulodi4(0, 0x7FFFFFFFFFFFFFFFLL, 0, 0))
+        return 1;
+    if (test__mulodi4(0x7FFFFFFFFFFFFFFFLL, 1, 0x7FFFFFFFFFFFFFFFLL, 0))
+        return 1;
+    if (test__mulodi4(1, 0x7FFFFFFFFFFFFFFFLL, 0x7FFFFFFFFFFFFFFFLL, 0))
+        return 1;
+     if (test__mulodi4(0x7FFFFFFFFFFFFFFFLL, 2, 0x8000000000000001LL, 1))
+         return 1;
+     if (test__mulodi4(2, 0x7FFFFFFFFFFFFFFFLL, 0x8000000000000001LL, 1))
+         return 1;
+
+     if (test__mulodi4(0x8000000000000000LL, -2, 0x8000000000000000LL, 1))
+         return 1;
+     if (test__mulodi4(-2, 0x8000000000000000LL, 0x8000000000000000LL, 1))
+         return 1;
+     if (test__mulodi4(0x8000000000000000LL, -1, 0x8000000000000000LL, 1))
+         return 1;
+     if (test__mulodi4(-1, 0x8000000000000000LL, 0x8000000000000000LL, 1))
+         return 1;
+    if (test__mulodi4(0x8000000000000000LL, 0, 0, 0))
+        return 1;
+    if (test__mulodi4(0, 0x8000000000000000LL, 0, 0))
+        return 1;
+    if (test__mulodi4(0x8000000000000000LL, 1, 0x8000000000000000LL, 0))
+        return 1;
+    if (test__mulodi4(1, 0x8000000000000000LL, 0x8000000000000000LL, 0))
+        return 1;
+     if (test__mulodi4(0x8000000000000000LL, 2, 0x8000000000000000LL, 1))
+         return 1;
+     if (test__mulodi4(2, 0x8000000000000000LL, 0x8000000000000000LL, 1))
+         return 1;
+
+     if (test__mulodi4(0x8000000000000001LL, -2, 0x8000000000000001LL, 1))
+         return 1;
+     if (test__mulodi4(-2, 0x8000000000000001LL, 0x8000000000000001LL, 1))
+         return 1;
+    if (test__mulodi4(0x8000000000000001LL, -1, 0x7FFFFFFFFFFFFFFFLL, 0))
+        return 1;
+    if (test__mulodi4(-1, 0x8000000000000001LL, 0x7FFFFFFFFFFFFFFFLL, 0))
+        return 1;
+    if (test__mulodi4(0x8000000000000001LL, 0, 0, 0))
+        return 1;
+    if (test__mulodi4(0, 0x8000000000000001LL, 0, 0))
+        return 1;
+    if (test__mulodi4(0x8000000000000001LL, 1, 0x8000000000000001LL, 0))
+        return 1;
+    if (test__mulodi4(1, 0x8000000000000001LL, 0x8000000000000001LL, 0))
+        return 1;
+     if (test__mulodi4(0x8000000000000001LL, 2, 0x8000000000000000LL, 1))
+         return 1;
+     if (test__mulodi4(2, 0x8000000000000001LL, 0x8000000000000000LL, 1))
+         return 1;
+
+    return 0;
+}
diff --git a/test/Unit/mulosi4_test.c b/test/Unit/mulosi4_test.c
new file mode 100644
index 0000000..fc509db
--- /dev/null
+++ b/test/Unit/mulosi4_test.c
@@ -0,0 +1,156 @@
+//===-- mulosi4_test.c - Test __mulosi4 -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __mulosi4 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+#include <stdio.h>
+
+// Returns: a * b
+
+// Effects: aborts if a * b overflows
+
+si_int __mulosi4(si_int a, si_int b, int *overflow);
+
+int test__mulosi4(si_int a, si_int b, si_int expected, int expected_overflow)
+{
+  int ov;
+  si_int x = __mulosi4(a, b, &ov);
+  if (ov != expected_overflow)
+    printf("error in __mulosi4: overflow=%d expected=%d\n",
+	   ov, expected_overflow);
+  else if (!expected_overflow && x != expected) {
+    printf("error in __mulosi4: 0x%X * 0x%X = 0x%X (overflow=%d), "
+	   "expected 0x%X (overflow=%d)\n",
+	   a, b, x, ov, expected, expected_overflow);
+    return 1;
+  }
+  return 0;
+}
+
+
+int main()
+{
+    if (test__mulosi4(0, 0, 0, 0))
+        return 1;
+    if (test__mulosi4(0, 1, 0, 0))
+        return 1;
+    if (test__mulosi4(1, 0, 0, 0))
+        return 1;
+    if (test__mulosi4(0, 10, 0, 0))
+        return 1;
+    if (test__mulosi4(10, 0, 0, 0))
+        return 1;
+    if (test__mulosi4(0, 0x1234567, 0, 0))
+        return 1;
+    if (test__mulosi4(0x1234567, 0, 0, 0))
+        return 1;
+
+    if (test__mulosi4(0, -1, 0, 0))
+        return 1;
+    if (test__mulosi4(-1, 0, 0, 0))
+        return 1;
+    if (test__mulosi4(0, -10, 0, 0))
+        return 1;
+    if (test__mulosi4(-10, 0, 0, 0))
+        return 1;
+    if (test__mulosi4(0, -0x1234567, 0, 0))
+        return 1;
+    if (test__mulosi4(-0x1234567, 0, 0, 0))
+        return 1;
+
+    if (test__mulosi4(1, 1, 1, 0))
+        return 1;
+    if (test__mulosi4(1, 10, 10, 0))
+        return 1;
+    if (test__mulosi4(10, 1, 10, 0))
+        return 1;
+    if (test__mulosi4(1, 0x1234567, 0x1234567, 0))
+        return 1;
+    if (test__mulosi4(0x1234567, 1, 0x1234567, 0))
+        return 1;
+
+    if (test__mulosi4(1, -1, -1, 0))
+        return 1;
+    if (test__mulosi4(1, -10, -10, 0))
+        return 1;
+    if (test__mulosi4(-10, 1, -10, 0))
+        return 1;
+    if (test__mulosi4(1, -0x1234567, -0x1234567, 0))
+        return 1;
+    if (test__mulosi4(-0x1234567, 1, -0x1234567, 0))
+        return 1;
+
+     if (test__mulosi4(0x7FFFFFFF, -2, 0x80000001, 1))
+         return 1;
+     if (test__mulosi4(-2, 0x7FFFFFFF, 0x80000001, 1))
+         return 1;
+    if (test__mulosi4(0x7FFFFFFF, -1, 0x80000001, 0))
+        return 1;
+    if (test__mulosi4(-1, 0x7FFFFFFF, 0x80000001, 0))
+        return 1;
+    if (test__mulosi4(0x7FFFFFFF, 0, 0, 0))
+        return 1;
+    if (test__mulosi4(0, 0x7FFFFFFF, 0, 0))
+        return 1;
+    if (test__mulosi4(0x7FFFFFFF, 1, 0x7FFFFFFF, 0))
+        return 1;
+    if (test__mulosi4(1, 0x7FFFFFFF, 0x7FFFFFFF, 0))
+        return 1;
+     if (test__mulosi4(0x7FFFFFFF, 2, 0x80000001, 1))
+         return 1;
+     if (test__mulosi4(2, 0x7FFFFFFF, 0x80000001, 1))
+         return 1;
+
+     if (test__mulosi4(0x80000000, -2, 0x80000000, 1))
+         return 1;
+     if (test__mulosi4(-2, 0x80000000, 0x80000000, 1))
+         return 1;
+     if (test__mulosi4(0x80000000, -1, 0x80000000, 1))
+         return 1;
+     if (test__mulosi4(-1, 0x80000000, 0x80000000, 1))
+         return 1;
+    if (test__mulosi4(0x80000000, 0, 0, 0))
+        return 1;
+    if (test__mulosi4(0, 0x80000000, 0, 0))
+        return 1;
+    if (test__mulosi4(0x80000000, 1, 0x80000000, 0))
+        return 1;
+    if (test__mulosi4(1, 0x80000000, 0x80000000, 0))
+        return 1;
+     if (test__mulosi4(0x80000000, 2, 0x80000000, 1))
+         return 1;
+     if (test__mulosi4(2, 0x80000000, 0x80000000, 1))
+         return 1;
+
+     if (test__mulosi4(0x80000001, -2, 0x80000001, 1))
+         return 1;
+     if (test__mulosi4(-2, 0x80000001, 0x80000001, 1))
+         return 1;
+    if (test__mulosi4(0x80000001, -1, 0x7FFFFFFF, 0))
+        return 1;
+    if (test__mulosi4(-1, 0x80000001, 0x7FFFFFFF, 0))
+        return 1;
+    if (test__mulosi4(0x80000001, 0, 0, 0))
+        return 1;
+    if (test__mulosi4(0, 0x80000001, 0, 0))
+        return 1;
+    if (test__mulosi4(0x80000001, 1, 0x80000001, 0))
+        return 1;
+    if (test__mulosi4(1, 0x80000001, 0x80000001, 0))
+        return 1;
+     if (test__mulosi4(0x80000001, 2, 0x80000000, 1))
+         return 1;
+     if (test__mulosi4(2, 0x80000001, 0x80000000, 1))
+         return 1;
+
+    return 0;
+}
diff --git a/test/Unit/muloti4_test.c b/test/Unit/muloti4_test.c
new file mode 100644
index 0000000..44abddf
--- /dev/null
+++ b/test/Unit/muloti4_test.c
@@ -0,0 +1,280 @@
+//===-- muloti4_test.c - Test __muloti4 -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __muloti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#if __x86_64
+
+#include "int_lib.h"
+#include <stdio.h>
+
+// Returns: a * b
+
+// Effects: sets overflow if a * b overflows
+
+ti_int __muloti4(ti_int a, ti_int b, int *overflow);
+
+int test__muloti4(ti_int a, ti_int b, ti_int expected, int expected_overflow)
+{
+    int ov;
+    ti_int x = __muloti4(a, b, &ov);
+    if (ov != expected_overflow) {
+      twords at;
+      at.all = a;
+      twords bt;
+      bt.all = b;
+      twords xt;
+      xt.all = x;
+      twords expectedt;
+      expectedt.all = expected;
+
+      printf("error in __muloti4: overflow=%d expected=%d\n",
+	     ov, expected_overflow);
+      printf("error in __muloti4: 0x%.16llX%.16llX * 0x%.16llX%.16llX = "
+	     "0x%.16llX%.16llX, expected 0x%.16llX%.16llX\n",
+	     at.s.high, at.s.low, bt.s.high, bt.s.low, xt.s.high, xt.s.low,
+	     expectedt.s.high, expectedt.s.low);
+      return 1;
+    }
+    else if (!expected_overflow && x != expected)
+    {
+        twords at;
+        at.all = a;
+        twords bt;
+        bt.all = b;
+        twords xt;
+        xt.all = x;
+        twords expectedt;
+        expectedt.all = expected;
+        printf("error in __muloti4: 0x%.16llX%.16llX * 0x%.16llX%.16llX = "
+               "0x%.16llX%.16llX, expected 0x%.16llX%.16llX\n",
+               at.s.high, at.s.low, bt.s.high, bt.s.low, xt.s.high, xt.s.low,
+               expectedt.s.high, expectedt.s.low);
+	return 1;
+    }
+    return 0;
+}
+
+#endif
+
+int main()
+{
+#if __x86_64
+    if (test__muloti4(0, 0, 0, 0))
+        return 1;
+    if (test__muloti4(0, 1, 0, 0))
+        return 1;
+    if (test__muloti4(1, 0, 0, 0))
+        return 1;
+    if (test__muloti4(0, 10, 0, 0))
+        return 1;
+    if (test__muloti4(10, 0, 0, 0))
+        return 1;
+    if (test__muloti4(0, 81985529216486895LL, 0, 0))
+        return 1;
+    if (test__muloti4(81985529216486895LL, 0, 0, 0))
+        return 1;
+
+    if (test__muloti4(0, -1, 0, 0))
+        return 1;
+    if (test__muloti4(-1, 0, 0, 0))
+        return 1;
+    if (test__muloti4(0, -10, 0, 0))
+        return 1;
+    if (test__muloti4(-10, 0, 0, 0))
+        return 1;
+    if (test__muloti4(0, -81985529216486895LL, 0, 0))
+        return 1;
+    if (test__muloti4(-81985529216486895LL, 0, 0, 0))
+        return 1;
+
+    if (test__muloti4(1, 1, 1, 0))
+        return 1;
+    if (test__muloti4(1, 10, 10, 0))
+        return 1;
+    if (test__muloti4(10, 1, 10, 0))
+        return 1;
+    if (test__muloti4(1, 81985529216486895LL, 81985529216486895LL, 0))
+        return 1;
+    if (test__muloti4(81985529216486895LL, 1, 81985529216486895LL, 0))
+        return 1;
+
+    if (test__muloti4(1, -1, -1, 0))
+        return 1;
+    if (test__muloti4(1, -10, -10, 0))
+        return 1;
+    if (test__muloti4(-10, 1, -10, 0))
+        return 1;
+    if (test__muloti4(1, -81985529216486895LL, -81985529216486895LL, 0))
+        return 1;
+    if (test__muloti4(-81985529216486895LL, 1, -81985529216486895LL, 0))
+        return 1;
+
+    if (test__muloti4(3037000499LL, 3037000499LL, 9223372030926249001LL, 0))
+        return 1;
+    if (test__muloti4(-3037000499LL, 3037000499LL, -9223372030926249001LL, 0))
+        return 1;
+    if (test__muloti4(3037000499LL, -3037000499LL, -9223372030926249001LL, 0))
+        return 1;
+    if (test__muloti4(-3037000499LL, -3037000499LL, 9223372030926249001LL, 0))
+        return 1;
+
+    if (test__muloti4(4398046511103LL, 2097152LL, 9223372036852678656LL, 0))
+        return 1;
+    if (test__muloti4(-4398046511103LL, 2097152LL, -9223372036852678656LL, 0))
+        return 1;
+    if (test__muloti4(4398046511103LL, -2097152LL, -9223372036852678656LL, 0))
+        return 1;
+    if (test__muloti4(-4398046511103LL, -2097152LL, 9223372036852678656LL, 0))
+        return 1;
+
+    if (test__muloti4(2097152LL, 4398046511103LL, 9223372036852678656LL, 0))
+        return 1;
+    if (test__muloti4(-2097152LL, 4398046511103LL, -9223372036852678656LL, 0))
+        return 1;
+    if (test__muloti4(2097152LL, -4398046511103LL, -9223372036852678656LL, 0))
+        return 1;
+    if (test__muloti4(-2097152LL, -4398046511103LL, 9223372036852678656LL, 0))
+        return 1;
+
+    if (test__muloti4(make_ti(0x00000000000000B5LL, 0x04F333F9DE5BE000LL),
+                      make_ti(0x0000000000000000LL, 0x00B504F333F9DE5BLL),
+                      make_ti(0x7FFFFFFFFFFFF328LL, 0xDF915DA296E8A000LL), 0))
+        return 1;
+
+     if (test__muloti4(make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                       -2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000001LL), 1))
+       return 1;
+     if (test__muloti4(-2,
+                       make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                       make_ti(0x8000000000000000LL, 0x0000000000000001LL), 1))
+         return 1;
+    if (test__muloti4(make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                      -1,
+                      make_ti(0x8000000000000000LL, 0x0000000000000001LL), 0))
+        return 1;
+    if (test__muloti4(-1,
+                      make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                      make_ti(0x8000000000000000LL, 0x0000000000000001LL), 0))
+        return 1;
+    if (test__muloti4(make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                      0,
+                      0, 0))
+        return 1;
+    if (test__muloti4(0,
+                      make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                      0, 0))
+        return 1;
+    if (test__muloti4(make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                      1,
+                      make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL), 0))
+        return 1;
+    if (test__muloti4(1,
+                      make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                      make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL), 0))
+        return 1;
+     if (test__muloti4(make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                       2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000001LL), 1))
+         return 1;
+     if (test__muloti4(2,
+                       make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                       make_ti(0x8000000000000000LL, 0x0000000000000001LL), 1))
+         return 1;
+
+     if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                       -2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL), 1))
+         return 1;
+     if (test__muloti4(-2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL), 1))
+         return 1;
+     if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                       -1,
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL), 1))
+         return 1;
+     if (test__muloti4(-1,
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL), 1))
+         return 1;
+    if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                      0,
+                      0, 0))
+        return 1;
+    if (test__muloti4(0,
+                      make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                      0, 0))
+        return 1;
+    if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                      1,
+                      make_ti(0x8000000000000000LL, 0x0000000000000000LL), 0))
+        return 1;
+    if (test__muloti4(1,
+                      make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                      make_ti(0x8000000000000000LL, 0x0000000000000000LL), 0))
+        return 1;
+     if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                       2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL), 1))
+         return 1;
+     if (test__muloti4(2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL),
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL), 1))
+         return 1;
+
+     if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                       -2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000001LL), 1))
+         return 1;
+     if (test__muloti4(-2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                       make_ti(0x8000000000000000LL, 0x0000000000000001LL), 1))
+         return 1;
+    if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                      -1,
+                      make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL), 0))
+        return 1;
+    if (test__muloti4(-1,
+                      make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                      make_ti(0x7FFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL), 0))
+        return 1;
+    if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                      0,
+                      0, 0))
+        return 1;
+    if (test__muloti4(0,
+                      make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                      0, 0))
+        return 1;
+    if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                      1,
+                      make_ti(0x8000000000000000LL, 0x0000000000000001LL), 0))
+        return 1;
+    if (test__muloti4(1,
+                      make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                      make_ti(0x8000000000000000LL, 0x0000000000000001LL), 0))
+        return 1;
+     if (test__muloti4(make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                       2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL), 1))
+         return 1;
+     if (test__muloti4(2,
+                       make_ti(0x8000000000000000LL, 0x0000000000000001LL),
+                       make_ti(0x8000000000000000LL, 0x0000000000000000LL), 1))
+         return 1;
+
+#else
+    printf("skipped\n");
+#endif
+    return 0;
+}
diff --git a/test/Unit/ppc/CMakeLists.txt b/test/Unit/ppc/CMakeLists.txt
deleted file mode 100644
index 45f1a26..0000000
--- a/test/Unit/ppc/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-SET( SRCS
- floatditf_test.c
- fixtfdi_test.c
- qsub_test.c
- qadd_test.c
- qmul_test.c
- qdiv_test.c
- floatunditf_test.c
- )
diff --git a/test/timing/CMakeLists.txt b/test/timing/CMakeLists.txt
deleted file mode 100644
index abc7d8f..0000000
--- a/test/timing/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# create tests
-MACRO_ADD_CHECK_TEST( lshrdi3 lshrdi3.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatundixf floatundixf.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatdixf floatdixf.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( umoddi3 umoddi3.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( udivdi3 udivdi3.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( negdi2 negdi2.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ashrdi3 ashrdi3.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( muldi3 muldi3.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( ashldi3 ashldi3.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( divdi3 divdi3.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatundisf floatundisf.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatdidf floatdidf.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatdisf floatdisf.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( moddi3 moddi3.c ${TEST_TARGET_LIBRARIES} )
-MACRO_ADD_CHECK_TEST( floatundidf floatundidf.c ${TEST_TARGET_LIBRARIES} )
diff --git a/www/index.html b/www/index.html
index 7a7ef7c..85e5a83 100644
--- a/www/index.html
+++ b/www/index.html
@@ -26,6 +26,22 @@
   <p>All of the code in the compiler-rt project is <a 
      href="http://llvm.org/docs/DeveloperPolicy.html#license">dual licensed</a>
      under the MIT license and the UIUC License (a BSD-like license).</p>
+
+  <!--=====================================================================-->
+  <h2 id="users">Clients</h2>
+  <!--=====================================================================-->
+
+  <p>Currently compiler-rt is primarily used by
+    the <a href="http://clang.llvm.org">Clang</a>
+    and <a href="http://llvm.org">LLVM</a> projects as the implementation for
+    the runtime compiler support libraries. The library currently provides both
+    the low-level target-specific hooks required by code generation, as well as
+    additional modules for supporting the runtime requirements of features like
+    code coverage, profiling, or address sanitizer (ASAN) instrumentation.</p>
+
+  <p>For more information on using compiler-rt with Clang, please see the Clang
+    <a href="http://clang.llvm.org/get_started.html">Getting Started</a>
+    page.</p>
   
   <!--=====================================================================-->
   <h2 id="goals">Goals</h2>